From be7f640257e832a5161645f3dfcbd9acace6931e Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Mon, 31 Aug 2015 16:38:09 -0700
Subject: mesa/glformats: recognize ASTC formats as color formats

ASTC formats contain RGBA components.

Reviewed-by: Chad Versace <chad.versace@intel.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/glformats.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index ce66699db8f..3d12a01777c 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -1044,6 +1044,34 @@ _mesa_is_color_format(GLenum format)
       case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
       case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
       case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+      case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
       /* generic integer formats */
       case GL_RED_INTEGER_EXT:
       case GL_GREEN_INTEGER_EXT:
-- 
cgit v1.2.3


From f3a483069a6f0d38c10cec88417ae2b5b5aa3392 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Tue, 19 May 2015 12:28:20 -0700
Subject: i965: advertise ASTC support for Skylake

v2: remove OES ASTC extension reference.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 4365b719801..3c764be07fb 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -354,6 +354,11 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_stencil_texturing = true;
    }
 
+   if (brw->gen >= 9) {
+      ctx->Extensions.KHR_texture_compression_astc_ldr = true;
+      ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+   }
+
    if (ctx->API == API_OPENGL_CORE)
       ctx->Extensions.ARB_base_instance = true;
    if (ctx->API != API_OPENGL_CORE)
-- 
cgit v1.2.3


From e16531fbe3a79bc27cf00701616a260b452597ef Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 31 Aug 2015 16:54:02 -0700
Subject: nir/builder: Use nir_after_instr to advance the cursor

This *should* ensure that the cursor gets properly advanced in all cases.
We had a problem before where, if the cursor was created using
nir_after_cf_node on a non-block cf_node, that would call nir_before_block
on the block following the cf node.  Instructions would then get inserted
in backwards order at the top of the block which is not at all what you
would expect from nir_after_cf_node.  By just resetting to after_instr, we
avoid all these problems.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_builder.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 08b40f8ea7c..ba988d71dde 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -49,8 +49,7 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
    nir_instr_insert(build->cursor, instr);
 
    /* Move the cursor forward. */
-   if (build->cursor.option == nir_cursor_after_instr)
-      build->cursor.instr = instr;
+   build->cursor = nir_after_instr(instr);
 }
 
 static inline void
-- 
cgit v1.2.3


From d770e2746c0b63e47dfd4ab1733ab304dd7222da Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Tue, 1 Sep 2015 08:19:11 +0300
Subject: mesa: Expose GL_ARB_framebuffer_no_attachments to GLES 3.1

V2: Conform to new standard for exposing enums for OpenGL ES 3.1.

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml | 4 ++--
 src/mapi/glapi/gen/apiexec.py                         | 4 ++--
 src/mesa/main/get_hash_params.py                      | 8 +++++---
 src/mesa/main/tests/dispatch_sanity.cpp               | 6 ++----
 src/mesa/main/version.c                               | 2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
index 59839a044be..55ad7642ff5 100644
--- a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
+++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
@@ -15,13 +15,13 @@
    <enum name="MAX_FRAMEBUFFER_LAYERS"                     value="0x9317" />
    <enum name="MAX_FRAMEBUFFER_SAMPLES"                    value="0x9318" />
 
-    <function name="FramebufferParameteri">
+    <function name="FramebufferParameteri" es2="3.1">
        <param name="target" type="GLenum"/>
        <param name="pname"  type="GLenum"/>
        <param name="param"  type="GLint" />
     </function>
 
-    <function name="GetFramebufferParameteriv">
+    <function name="GetFramebufferParameteriv" es2="3.1">
        <param name="target" type="GLenum" />
        <param name="pname"  type="GLenum" />
        <param name="params" type="GLint *" output="true" />
diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py
index 3a0eb1869a8..58ec08be466 100644
--- a/src/mapi/glapi/gen/apiexec.py
+++ b/src/mapi/glapi/gen/apiexec.py
@@ -151,8 +151,8 @@ functions = {
 
     # OpenGL 4.3 / GL_ARB_framebuffer_no_attachments.  Mesa can expose the
     # extension with OpenGL 3.0.
-    "FramebufferParameteri": exec_info(compatibility=30, core=31),
-    "GetFramebufferParameteri": exec_info(compatibility=30, core=31),
+    "FramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
+    "GetFramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
 
     # OpenGL 4.5 / GL_ARB_direct_state_access.   Mesa can expose the extension
     # with core profile.
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index b3c337e9d45..dc5ba6fc904 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -451,6 +451,11 @@ descriptor=[
   [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
   [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
 
+# GL_ARB_framebuffer_no_attachments / GLES 3.1
+  ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
+
 # GL_ARB_explicit_uniform_location / GLES 3.1
   [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
 ]},
@@ -811,10 +816,7 @@ descriptor=[
   [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
 
 # GL_ARB_framebuffer_no_attachments
-  ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
-  ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
   ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
-  ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
 
 # GL_EXT_polygon_offset_clamp
   [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index b941f3e522e..0ddda59410e 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2405,10 +2405,8 @@ const struct function gles31_functions_possible[] = {
    { "glDrawArraysIndirect", 31, -1 },
    { "glDrawElementsIndirect", 31, -1 },
 
-   // FINISHME: These two functions have not been implemented yet.  They come
-   // FINISHME: from the ARB_framebuffer_no_attachments extension.
-   // { "glFramebufferParameteri", 31, -1 },
-   // { "glGetFramebufferParameteriv", 31, -1 },
+   { "glFramebufferParameteri", 31, -1 },
+   { "glGetFramebufferParameteriv", 31, -1 },
 
    { "glGetProgramInterfaceiv", 31, -1 },
    { "glGetProgramResourceIndex", 31, -1 },
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index f811c1ade4e..498b2f867d0 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -451,7 +451,7 @@ compute_version_es2(const struct gl_extensions *extensions)
                          extensions->ARB_compute_shader &&
                          extensions->ARB_draw_indirect &&
                          extensions->ARB_explicit_uniform_location &&
-                         false /*extensions->ARB_framebuffer_no_attachments*/ &&
+                         extensions->ARB_framebuffer_no_attachments &&
                          extensions->ARB_shader_atomic_counters &&
                          extensions->ARB_shader_image_load_store &&
                          extensions->ARB_shader_image_size &&
-- 
cgit v1.2.3


From f8a938814e8cdbf65153c277f257df8e22fae9c5 Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Mon, 24 Aug 2015 13:01:53 +0200
Subject: mesa: Limit Framebuffer Parameter OpenGL ES 3.1 usage

According to OpenGL ES 3.1 specification, section 9.2.1 for
glFramebufferParameter and section 9.2.3 for glGetFramebufferParameteriv:

"An INVALID_ENUM error is generated if pname is not FRAMEBUFFER_DEFAULT_WIDTH,
FRAMEBUFFER_DEFAULT_HEIGHT, FRAMEBUFFER_DEFAULT_SAMPLES, or
FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS."

Therefore exclude OpenGL ES 3.1 from using the GL_FRAMEBUFFER_DEFAULT_LAYERS
parameter.

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
Reviewed-by: Kevin Rogovin <kevin.rogovin at intel.com>
---
 src/mesa/main/fbobject.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 07db1950bbb..6b6ebb78f75 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1389,8 +1389,16 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
          fb->DefaultGeometry.Height = param;
       break;
    case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+     /*
+      * According to the OpenGL ES 3.1 specification section 9.2.1, the
+      * GL_FRAMEBUFFER_DEFAULT_LAYERS parameter name is not supported.
+      */
+      if (_mesa_is_gles31(ctx)) {
+         _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+         break;
+      }
       if (param < 0 || param > ctx->Const.MaxFramebufferLayers)
-        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+         _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
       else
          fb->DefaultGeometry.Layers = param;
       break;
@@ -1451,6 +1459,14 @@ get_framebuffer_parameteriv(struct gl_context *ctx, struct gl_framebuffer *fb,
       *params = fb->DefaultGeometry.Height;
       break;
    case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+      /*
+       * According to the OpenGL ES 3.1 specification section 9.2.3, the
+       * GL_FRAMEBUFFER_LAYERS parameter name is not supported.
+       */
+      if (_mesa_is_gles31(ctx)) {
+         _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+         break;
+      }
       *params = fb->DefaultGeometry.Layers;
       break;
    case GL_FRAMEBUFFER_DEFAULT_SAMPLES:
-- 
cgit v1.2.3


From 0de53ccc8cbee0f63ba25c9e72664b3cbd31be54 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Sep 2015 12:29:58 +1000
Subject: r600g: fix calculation for gpr allocation

I've been chasing a geom shader hang on rv635 since I wrote
r600 geom code, and finally I hacked some values from fglrx
in and I could run texelfetch without failures.

This is totally my fault as well, maths fail 101.

This makes geom shaders on r600 not fail heavily.

Cc: "10.6" "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 51527631efd..89e959b6b0f 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
-- 
cgit v1.2.3


From 1e6ad4b0270fa524e7a6374e1b15540e02668019 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 1 May 2015 02:38:17 -0400
Subject: nir: add an optimization for removing dead control flow

v2: use nir_cf_node_remove_after() instead of our own broken thing.
v3: use the new control flow modification helpers.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/Makefile.sources      |   1 +
 src/glsl/nir/nir.h             |   2 +
 src/glsl/nir/nir_opt_dead_cf.c | 155 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+)
 create mode 100644 src/glsl/nir/nir_opt_dead_cf.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 05a12bc0721..c422303ddac 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -54,6 +54,7 @@ NIR_FILES = \
 	nir/nir_opt_copy_propagate.c \
 	nir/nir_opt_cse.c \
 	nir/nir_opt_dce.c \
+	nir/nir_opt_dead_cf.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
 	nir/nir_opt_peephole_ffma.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 9703372fcc0..c65d090eac1 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1825,6 +1825,8 @@ bool nir_opt_cse(nir_shader *shader);
 bool nir_opt_dce_impl(nir_function_impl *impl);
 bool nir_opt_dce(nir_shader *shader);
 
+bool nir_opt_dead_cf(nir_shader *shader);
+
 void nir_opt_gcm(nir_shader *shader);
 
 bool nir_opt_peephole_select(nir_shader *shader);
diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
new file mode 100644
index 00000000000..fca842caedd
--- /dev/null
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/*
+ * This file implements an optimization that deletes statically unreachable
+ * code. In NIR, one way this can happen if if an if statement has a constant
+ * condition:
+ *
+ * if (true) {
+ *    ...
+ * }
+ *
+ * We delete the if statement and paste the contents of the always-executed
+ * branch into the surrounding control flow, possibly removing more code if
+ * the branch had a jump at the end.
+ */
+
+static void
+remove_after_cf_node(nir_cf_node *node)
+{
+   nir_cf_node *end = node;
+   while (!nir_cf_node_is_last(end))
+      end = nir_cf_node_next(end);
+
+   nir_cf_list list;
+   nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end));
+   nir_cf_delete(&list);
+}
+
+static void
+opt_constant_if(nir_if *if_stmt, bool condition)
+{
+   void *mem_ctx = ralloc_parent(if_stmt);
+
+   /* First, we need to remove any phi nodes after the if by rewriting uses to
+    * point to the correct source.
+    */
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+   nir_block *last_block =
+      nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt)
+                                     : nir_if_last_else_node(if_stmt));
+
+   nir_foreach_instr_safe(after, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_ssa_def *def = NULL;
+      nir_foreach_phi_src(phi, phi_src) {
+         if (phi_src->pred != last_block)
+            continue;
+
+         assert(phi_src->src.is_ssa);
+         def = phi_src->src.ssa;
+      }
+
+      assert(def);
+      assert(phi->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def), mem_ctx);
+      nir_instr_remove(instr);
+   }
+
+   /* The control flow list we're about to paste in may include a jump at the
+    * end, and in that case we have to delete the rest of the control flow
+    * list after the if since it's unreachable and the validator will balk if
+    * we don't.
+    */
+
+   if (!exec_list_is_empty(&last_block->instr_list)) {
+      nir_instr *last_instr = nir_block_last_instr(last_block);
+      if (last_instr->type == nir_instr_type_jump)
+         remove_after_cf_node(&if_stmt->cf_node);
+   }
+
+   /* Finally, actually paste in the then or else branch and delete the if. */
+   struct exec_list *cf_list = condition ? &if_stmt->then_list
+                                         : &if_stmt->else_list;
+
+   nir_cf_list list;
+   nir_cf_extract(&list, nir_before_cf_list(cf_list),
+                  nir_after_cf_list(cf_list));
+   nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node));
+   nir_cf_node_remove(&if_stmt->cf_node);
+}
+
+static bool
+dead_cf_cb(nir_block *block, void *state)
+{
+   bool *progress = state;
+
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (!following_if)
+      return true;
+
+  nir_const_value *const_value =
+     nir_src_as_const_value(following_if->condition);
+
+  if (!const_value)
+     return true;
+
+   opt_constant_if(following_if, const_value->u[0] != 0);
+   *progress = true;
+   return true;
+}
+
+static bool
+opt_dead_cf_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+   nir_foreach_block(impl, dead_cf_cb, &progress);
+
+   if (progress)
+      nir_metadata_preserve(impl, nir_metadata_none);
+
+   return progress;
+}
+
+bool
+nir_opt_dead_cf(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         progress |= opt_dead_cf_impl(overload->impl);
+
+   return progress;
+}
-- 
cgit v1.2.3


From f649afc9ddabfc70a5f9861e6daa2cb996461f45 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 8 May 2015 01:44:24 -0400
Subject: nir/dead_cf: delete code that's unreachable due to jumps

v2: use nir_cf_node_remove_after().
v2: use foreach_list_typed() instead of hardcoding a list walk.
v3: update to new control flow modification helpers.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_opt_dead_cf.c | 123 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
index fca842caedd..66961cea961 100644
--- a/src/glsl/nir/nir_opt_dead_cf.c
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -40,6 +40,26 @@
  * We delete the if statement and paste the contents of the always-executed
  * branch into the surrounding control flow, possibly removing more code if
  * the branch had a jump at the end.
+ *
+ * The other way is that control flow can end in a jump so that code after it
+ * never gets executed. In particular, this can happen after optimizing
+ * something like:
+ *
+ * if (true) {
+ *    ...
+ *    break;
+ * }
+ * ...
+ *
+ * We also consider the case where both branches of an if end in a jump, e.g.:
+ *
+ * if (...) {
+ *    break;
+ * } else {
+ *    continue;
+ * }
+ * ...
+ *
  */
 
 static void
@@ -111,30 +131,117 @@ opt_constant_if(nir_if *if_stmt, bool condition)
 }
 
 static bool
-dead_cf_cb(nir_block *block, void *state)
+dead_cf_block(nir_block *block)
 {
-   bool *progress = state;
-
    nir_if *following_if = nir_block_get_following_if(block);
    if (!following_if)
-      return true;
+      return false;
 
   nir_const_value *const_value =
      nir_src_as_const_value(following_if->condition);
 
   if (!const_value)
-     return true;
+     return false;
 
    opt_constant_if(following_if, const_value->u[0] != 0);
-   *progress = true;
    return true;
 }
 
 static bool
-opt_dead_cf_impl(nir_function_impl *impl)
+ends_in_jump(nir_block *block)
+{
+   if (exec_list_is_empty(&block->instr_list))
+      return false;
+
+   nir_instr *instr = nir_block_last_instr(block);
+   return instr->type == nir_instr_type_jump;
+}
+
+static bool
+dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
 {
    bool progress = false;
-   nir_foreach_block(impl, dead_cf_cb, &progress);
+   *list_ends_in_jump = false;
+
+   nir_cf_node *prev = NULL;
+
+   foreach_list_typed(nir_cf_node, cur, node, list) {
+      switch (cur->type) {
+      case nir_cf_node_block: {
+         nir_block *block = nir_cf_node_as_block(cur);
+         if (dead_cf_block(block)) {
+            /* We just deleted the if after this block, so we may have
+             * deleted the block before or after it -- which one is an
+             * implementation detail. Therefore, to recover the place we were
+             * at, we have to use the previous cf_node.
+             */
+
+            if (prev) {
+               cur = nir_cf_node_next(prev);
+            } else {
+               cur = exec_node_data(nir_cf_node, exec_list_get_head(list),
+                                    node);
+            }
+
+            block = nir_cf_node_as_block(cur);
+
+            progress = true;
+         }
+
+         if (ends_in_jump(block)) {
+            *list_ends_in_jump = true;
+
+            if (!exec_node_is_tail_sentinel(cur->node.next)) {
+               remove_after_cf_node(cur);
+               return true;
+            }
+         }
+
+         break;
+      }
+
+      case nir_cf_node_if: {
+         nir_if *if_stmt = nir_cf_node_as_if(cur);
+         bool then_ends_in_jump, else_ends_in_jump;
+         progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump);
+         progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump);
+
+         if (then_ends_in_jump && else_ends_in_jump) {
+            *list_ends_in_jump = true;
+            nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur));
+            if (!exec_list_is_empty(&next->instr_list) ||
+                !exec_node_is_tail_sentinel(next->cf_node.node.next)) {
+               remove_after_cf_node(cur);
+               return true;
+            }
+         }
+
+         break;
+      }
+
+      case nir_cf_node_loop: {
+         nir_loop *loop = nir_cf_node_as_loop(cur);
+         bool dummy;
+         progress |= dead_cf_list(&loop->body, &dummy);
+
+         break;
+      }
+
+      default:
+         unreachable("unknown cf node type");
+      }
+
+      prev = cur;
+   }
+
+   return progress;
+}
+
+static bool
+opt_dead_cf_impl(nir_function_impl *impl)
+{
+   bool dummy;
+   bool progress = dead_cf_list(&impl->body, &dummy);
 
    if (progress)
       nir_metadata_preserve(impl, nir_metadata_none);
-- 
cgit v1.2.3


From 89dc0626bd71343312049bdca291704cb2f7fffe Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 8 May 2015 13:17:10 -0400
Subject: nir: add nir_block_get_following_loop() helper

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.c | 16 ++++++++++++++++
 src/glsl/nir/nir.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index bf001312121..acb251afe8b 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1335,6 +1335,22 @@ nir_block_get_following_if(nir_block *block)
    return nir_cf_node_as_if(next_node);
 }
 
+nir_loop *
+nir_block_get_following_loop(nir_block *block)
+{
+   if (exec_node_is_tail_sentinel(&block->cf_node.node))
+      return NULL;
+
+   if (nir_cf_node_is_last(&block->cf_node))
+      return NULL;
+
+   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+   if (next_node->type != nir_cf_node_loop)
+      return NULL;
+
+   return nir_cf_node_as_loop(next_node);
+}
 static bool
 index_block(nir_block *block, void *state)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index c65d090eac1..6853b0e9145 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1732,6 +1732,8 @@ bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
  */
 nir_if *nir_block_get_following_if(nir_block *block);
 
+nir_loop *nir_block_get_following_loop(nir_block *block);
+
 void nir_index_local_regs(nir_function_impl *impl);
 void nir_index_global_regs(nir_shader *shader);
 void nir_index_ssa_defs(nir_function_impl *impl);
-- 
cgit v1.2.3


From 019eea1c4fadaa86ce88ab81ac2f14613381738b Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 8 May 2015 14:40:58 -0400
Subject: nir: add a helper for iterating over blocks in a cf node

We were already doing this internally for iterating over a function
implementation, so just expose it directly.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.c | 7 +++++++
 src/glsl/nir/nir.h | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index acb251afe8b..3f4bec4625d 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1292,6 +1292,13 @@ foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
    return false;
 }
 
+bool
+nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                             void *state)
+{
+   return foreach_cf_node(node, cb, false, state);
+}
+
 bool
 nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 6853b0e9145..e73b7fbc4e1 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1726,6 +1726,8 @@ bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
                        void *state);
 bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
                                void *state);
+bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                                  void *state);
 
 /* If the following CF node is an if, this function returns that if.
  * Otherwise, it returns NULL.
-- 
cgit v1.2.3


From aec6744501be06e37dbcb620eba0deef54fcad31 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 8 May 2015 14:42:14 -0400
Subject: nir/dead_cf: add support for removing useless loops

v2: fix detecting if the loop has any phi nodes after it.
v2: use nir_foreach_ssa_def() instead of nir_foreach_dest() when
    checking for values live after the loop to catch const_load
    instructions.
v2: fix handling return instructions
v2: add some documentation to loop_is_dead()

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/nir/nir_opt_dead_cf.c | 121 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 109 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
index 66961cea961..5c5510484af 100644
--- a/src/glsl/nir/nir_opt_dead_cf.c
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -29,9 +29,9 @@
 #include "nir_control_flow.h"
 
 /*
- * This file implements an optimization that deletes statically unreachable
- * code. In NIR, one way this can happen if if an if statement has a constant
- * condition:
+ * This file implements an optimization that deletes statically
+ * unreachable/dead code. In NIR, one way this can happen if if an if
+ * statement has a constant condition:
  *
  * if (true) {
  *    ...
@@ -41,7 +41,7 @@
  * branch into the surrounding control flow, possibly removing more code if
  * the branch had a jump at the end.
  *
- * The other way is that control flow can end in a jump so that code after it
+ * Another way is that control flow can end in a jump so that code after it
  * never gets executed. In particular, this can happen after optimizing
  * something like:
  *
@@ -60,6 +60,12 @@
  * }
  * ...
  *
+ * Finally, we also handle removing useless loops, i.e. loops with no side
+ * effects and without any definitions that are used elsewhere. This case is a
+ * little different from the first two in that the code is actually run (it
+ * just never does anything), but there are similar issues with needing to
+ * be careful with restarting after deleting the cf_node (see dead_cf_list())
+ * so this is a convenient place to remove them.
  */
 
 static void
@@ -130,20 +136,108 @@ opt_constant_if(nir_if *if_stmt, bool condition)
    nir_cf_node_remove(&if_stmt->cf_node);
 }
 
+static bool
+block_has_no_side_effects(nir_block *block, void *state)
+{
+   (void) state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_call)
+         return false;
+
+      /* Return instructions can cause us to skip over other side-effecting
+       * instructions after the loop, so consider them to have side effects
+       * here.
+       */
+
+      if (instr->type == nir_instr_type_jump &&
+          nir_instr_as_jump(instr)->type == nir_jump_return)
+         return false;
+
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      if (!nir_intrinsic_infos[intrin->intrinsic].flags &
+          NIR_INTRINSIC_CAN_ELIMINATE)
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+def_not_live_out(nir_ssa_def *def, void *state)
+{
+   nir_block *after = state;
+
+   return !BITSET_TEST(after->live_in, def->live_index);
+}
+
+/*
+ * Test if a loop is dead. A loop is dead if:
+ *
+ * 1) It has no side effects (i.e. intrinsics which could possibly affect the
+ * state of the program aside from producing an SSA value, indicated by a lack
+ * of NIR_INTRINSIC_CAN_ELIMINATE).
+ *
+ * 2) It has no phi nodes after it, since those indicate values inside the
+ * loop being used after the loop.
+ *
+ * 3) If there are no phi nodes after the loop, then the only way a value
+ * defined inside the loop can be used outside the loop is if its definition
+ * dominates the block after the loop. If none of the definitions that
+ * dominate the loop exit are used outside the loop, then the loop is dead
+ * and it can be deleted.
+ */
+
+static bool
+loop_is_dead(nir_loop *loop)
+{
+   nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+
+   if (!exec_list_is_empty(&after->instr_list) &&
+       nir_block_first_instr(after)->type == nir_instr_type_phi)
+      return false;
+
+   if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects,
+                                     NULL))
+      return false;
+
+   for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
+      nir_foreach_instr(cur, instr) {
+         if (!nir_foreach_ssa_def(instr, def_not_live_out, after))
+            return false;
+      }
+   }
+
+   return true;
+}
+
 static bool
 dead_cf_block(nir_block *block)
 {
    nir_if *following_if = nir_block_get_following_if(block);
-   if (!following_if)
-      return false;
+   if (following_if) {
+     nir_const_value *const_value =
+        nir_src_as_const_value(following_if->condition);
+
+     if (!const_value)
+        return false;
 
-  nir_const_value *const_value =
-     nir_src_as_const_value(following_if->condition);
+      opt_constant_if(following_if, const_value->u[0] != 0);
+      return true;
+   }
 
-  if (!const_value)
-     return false;
+   nir_loop *following_loop = nir_block_get_following_loop(block);
+   if (!following_loop)
+      return false;
 
-   opt_constant_if(following_if, const_value->u[0] != 0);
+   if (!loop_is_dead(following_loop))
+      return false;
+
+   nir_cf_node_remove(&following_loop->cf_node);
    return true;
 }
 
@@ -170,7 +264,7 @@ dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
       case nir_cf_node_block: {
          nir_block *block = nir_cf_node_as_block(cur);
          if (dead_cf_block(block)) {
-            /* We just deleted the if after this block, so we may have
+            /* We just deleted the if or loop after this block, so we may have
              * deleted the block before or after it -- which one is an
              * implementation detail. Therefore, to recover the place we were
              * at, we have to use the previous cf_node.
@@ -240,6 +334,9 @@ dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
 static bool
 opt_dead_cf_impl(nir_function_impl *impl)
 {
+   nir_metadata_require(impl, nir_metadata_live_variables |
+                              nir_metadata_dominance);
+
    bool dummy;
    bool progress = dead_cf_list(&impl->body, &dummy);
 
-- 
cgit v1.2.3


From 1484d8c9aa2e7e78462ffb5c207394bef77af89b Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 1 May 2015 02:51:12 -0400
Subject: i965/nir: enable the dead control flow optimization

total instructions in shared programs: 7541551 -> 7541381 (-0.00%)
instructions in affected programs:     3054 -> 2884 (-5.57%)
helped:                                29

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 247b223f2e2..8f3edc5cf01 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -61,6 +61,8 @@ nir_optimize(nir_shader *nir, bool is_scalar)
       nir_validate_shader(nir);
       progress |= nir_opt_constant_folding(nir);
       nir_validate_shader(nir);
+      progress |= nir_opt_dead_cf(nir);
+      nir_validate_shader(nir);
       progress |= nir_opt_remove_phis(nir);
       nir_validate_shader(nir);
       progress |= nir_opt_undef(nir);
-- 
cgit v1.2.3


From d38a5601068ae1d923efece8f28757777f4474e4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2015 09:33:33 +0100
Subject: i965: Prevent coordinate overflow in intel_emit_linear_blit

Fixes regression from
commit 8c17d53823c77ac1c56b0548e4e54f69a33285f1
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Wed Apr 15 03:04:33 2015 -0700

    i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions.

which adjusted the coordinates to be relative to the nearest cacheline.
However, this then offsets the coordinates by up to 63 and this may then
cause them to overflow the BLT limits. For the well aligned large
transfer case, we can use 32bpp pixels and so reduce the coordinates by
4 (versus the current 8bpp pixels). We also have to be more careful
doing the last line just in case it may exceed the coordinate limit.

Reported-and-tested-by: kaillasse91@hotmail.fr
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90734
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Ian Romanick <ian.d.romanick@intel.com>
Cc: Anuj Phogat <anuj.phogat@gmail.com>
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 72 ++++++++++++++++------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 6d92580e725..5a1da128888 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -796,47 +796,43 @@ intel_emit_linear_blit(struct brw_context *brw,
    int16_t src_x, dst_x;
    bool ok;
 
-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
-
-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
-
-   if (size != 0) {
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;
+
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);
+
       ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
                              INTEL_MIPTREE_TRMODE_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
                              INTEL_MIPTREE_TRMODE_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }
 
 /**
-- 
cgit v1.2.3


From fc292b5821ca2d21cf5ebc83994138b87085d878 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 20:53:08 +0200
Subject: gallium/util: add u_bit_scan_consecutive_range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/auxiliary/util/u_math.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 56bd185f527..7175d1d4ee8 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -483,6 +483,26 @@ u_bit_scan64(uint64_t *mask)
 }
 #endif
 
+/* For looping over a bitmask when you want to loop over consecutive bits
+ * manually, for example:
+ *
+ * while (mask) {
+ *    int start, count, i;
+ *
+ *    u_bit_scan_consecutive_range(&mask, &start, &count);
+ *
+ *    for (i = 0; i < count; i++)
+ *       ... process element (start+i)
+ * }
+ */
+static inline void
+u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
+{
+   *start = ffs(*mask) - 1;
+   *count = ffs(~(*mask >> *start)) - 1;
+   *mask &= ~(((1 << *count) - 1) << *start);
+}
+
 /**
  * Return float bits.
  */
-- 
cgit v1.2.3


From b1e54512117e0269095273514e5ab70b697a41f4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 11:59:23 +0200
Subject: winsys/amdgpu: use small IBs for better performance on VI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0f42298c2ad..3e9fc4023b3 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -200,17 +200,19 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
 
 static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
 {
-   /* The maximum size is 4MB - 1B, which is unaligned.
-    * Use aligned size 4MB - 16B. */
-   const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
-   const unsigned min_ib_size = 24 * 1024 * 4;
+   /* Small IBs are better than big IBs, because the GPU goes idle quicker
+    * and there is less waiting for buffers and fences. Proof:
+    *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
+    */
+   const unsigned buffer_size = 128 * 1024 * 4;
+   const unsigned ib_size = 20 * 1024 * 4;
 
    cs->base.cdw = 0;
    cs->base.buf = NULL;
 
    /* Allocate a new buffer for IBs if the current buffer is all used. */
    if (!cs->big_ib_buffer ||
-       cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+       cs->used_ib_space + ib_size > cs->big_ib_buffer->size) {
       struct radeon_winsys *ws = &cs->ctx->ws->base;
       struct radeon_winsys_cs_handle *winsys_bo;
 
@@ -219,7 +221,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
       cs->ib_mapped = NULL;
       cs->used_ib_space = 0;
 
-      cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+      cs->big_ib_buffer = ws->buffer_create(ws, buffer_size,
                                             4096, true,
                                             RADEON_DOMAIN_GTT,
                                             RADEON_FLAG_CPU_ACCESS);
@@ -239,7 +241,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
 
    cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
    cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
-   cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+   cs->base.max_dw = ib_size / 4;
    return true;
 }
 
-- 
cgit v1.2.3


From 9b510a9652297a63677f1d55b2bf444694fd94e1 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 22:59:23 +0200
Subject: radeonsi: fix a Unigine Heaven hang when drirc is missing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.h        |  1 +
 src/gallium/drivers/radeonsi/si_state.c         | 16 +++++++++++++++-
 src/gallium/drivers/radeonsi/si_state.h         |  2 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 10 ++++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 423b849b7c3..ad32473b91e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -190,6 +190,7 @@ struct si_shader_selector {
 	uint64_t	inputs_read;
 	uint64_t	outputs_written;
 	uint32_t	patch_outputs_written;
+	uint32_t	ps_colors_written;
 };
 
 /* Valid shader configurations:
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 806ab5f0e22..1ca5e4667b2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"
 
+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -233,8 +234,10 @@ static unsigned si_pack_float_12p4(float x)
  * - The COLOR1 format isn't INVALID because of possible dual-source blending,
  *   so COLOR1 is enabled pretty much all the time.
  * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
  */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -252,6 +255,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;
 
+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -343,6 +356,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		return NULL;
 
 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
 
 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 118c5622c62..242db8afd45 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };
 
 struct si_state_sample_mask {
@@ -251,6 +252,7 @@ void si_shader_change_notify(struct si_context *sctx);
 /* si_state.c */
 struct si_shader_selector;
 
+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                                enum pipe_format format,
                                enum pipe_texture_target target,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a09f588b356..b223e060be3 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -713,6 +713,15 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 			}
 		}
 		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}
 
 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -840,6 +849,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}
 
 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }
 
 static void si_delete_shader_selector(struct pipe_context *ctx,
-- 
cgit v1.2.3


From 08775a219628611989ab87c621255ac3c841dcda Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 00:12:03 +0200
Subject: radeonsi: set all 16 viewport Z bounds for GL 4.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1ca5e4667b2..b3a18267af1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3180,6 +3180,7 @@ static void si_init_config(struct si_context *sctx)
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	int i;
 
 	if (pm4 == NULL)
 		return;
@@ -3210,6 +3211,11 @@ static void si_init_config(struct si_context *sctx)
 
 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
+	for (i = 0; i < 16; i++) {
+		si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+		si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+	}
+
 	switch (sctx->screen->b.family) {
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
@@ -3296,8 +3302,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
-	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
 	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
-- 
cgit v1.2.3


From 05af645a951fd985d0dbe3c22614e1dee8dfb3f0 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 03:44:03 +0200
Subject: radeonsi: fix memory usage checking for big IBs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 110e3163021..563251ddd4b 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -33,14 +33,6 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 	int i;
 
-	/* If the CS is sufficiently large, don't count the space needed
-	 * and just flush if there is less than 8096 dwords left. */
-	if (cs->max_dw >= 24 * 1024) {
-		if (cs->cdw > cs->max_dw - 8 * 1024)
-			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-		return;
-	}
-
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_reloc) and two counters in the pipe
 	 * driver for those that haven't been added yet.
@@ -54,6 +46,15 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	ctx->b.gtt = 0;
 	ctx->b.vram = 0;
 
+	/* If the CS is sufficiently large, don't count the space needed
+	 * and just flush if there is less than 8096 dwords left.
+	 */
+	if (cs->max_dw >= 24 * 1024) {
+		if (cs->cdw > cs->max_dw - 8 * 1024)
+			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		return;
+	}
+
 	/* The number of dwords we already used in the CS so far. */
 	num_dw += cs->cdw;
 
-- 
cgit v1.2.3


From 02c8e06497c14bed37dc1780585348bb2675cab6 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 22:33:02 +0200
Subject: radeonsi: add SI_MAX_ATTRIBS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PIPE_MAX_ATTRIBS is 32, but we currently only support 16.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 2 +-
 src/gallium/drivers/radeonsi/si_state.h | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b3a18267af1..62eda97dc7e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2784,7 +2784,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
 	int i;
 
-	assert(count < PIPE_MAX_ATTRIBS);
+	assert(count < SI_MAX_ATTRIBS);
 	if (!v)
 		return NULL;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 242db8afd45..bf713c42e86 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -31,6 +31,7 @@
 #include "radeon/r600_pipe_common.h"
 
 #define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
+#define SI_MAX_ATTRIBS 16
 
 struct si_screen;
 struct si_shader;
@@ -84,9 +85,9 @@ struct si_state_dsa {
 struct si_vertex_element
 {
 	unsigned			count;
-	uint32_t			rsrc_word3[PIPE_MAX_ATTRIBS];
-	uint32_t			format_size[PIPE_MAX_ATTRIBS];
-	struct pipe_vertex_element	elements[PIPE_MAX_ATTRIBS];
+	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
+	uint32_t			format_size[SI_MAX_ATTRIBS];
+	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
 };
 
 union si_state {
@@ -156,7 +157,7 @@ struct si_shader_data {
 #define SI_SO_BUF_OFFSET	SI_NUM_RING_BUFFERS
 #define SI_NUM_RW_BUFFERS	(SI_SO_BUF_OFFSET + 4)
 
-#define SI_NUM_VERTEX_BUFFERS	16
+#define SI_NUM_VERTEX_BUFFERS	SI_MAX_ATTRIBS
 
 
 /* This represents descriptors in memory, such as buffer resources,
-- 
cgit v1.2.3


From f6a10f60b75821c20ce7cf338b519b92ed0330fc Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 21:08:49 +0200
Subject: radeonsi: optimize scissor states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- convert 16 states to 1 atom
- only emit 1 scissor if VIEWPORT_INDEX isn't written
- use only one packet when emitting consecutive scissors

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c          |  4 +-
 src/gallium/drivers/radeonsi/si_hw_context.c    |  3 ++
 src/gallium/drivers/radeonsi/si_pipe.c          |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h          | 10 +++++
 src/gallium/drivers/radeonsi/si_shader.h        |  4 +-
 src/gallium/drivers/radeonsi/si_state.c         | 57 ++++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_state.h         |  6 ---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 20 +++++++++
 8 files changed, 79 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index b7450b6fcec..b2f342f4fa1 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -68,9 +68,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 	if (sctx->queued.named.viewport[0]) {
 		util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
 	}
-	if (sctx->queued.named.scissor[0]) {
-		util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
-	}
+	util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
 	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
 	util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
 				     (struct pipe_stream_output_target**)sctx->b.streamout.targets);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 563251ddd4b..873a4727976 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -194,6 +194,9 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 
+	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+
 	r600_postflush_resume_features(&ctx->b);
 
 	ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 92c6ae3de2b..330b94665b3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -369,7 +369,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 8;
 
 	case PIPE_CAP_MAX_VIEWPORTS:
-		return 16;
+		return SI_MAX_VIEWPORTS;
 
 	/* Timer queries, present when the clock frequency is non zero. */
 	case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 52167f24a95..9060f948971 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -85,6 +85,8 @@
 #define SI_IS_TRACE_POINT(x)		(((x) & 0xcafe0000) == 0xcafe0000)
 #define SI_GET_TRACE_POINT_ID(x)	((x) & 0xffff)
 
+#define SI_MAX_VIEWPORTS	16
+
 struct si_compute;
 
 struct si_screen {
@@ -127,6 +129,12 @@ struct si_framebuffer {
 	unsigned			export_16bpc;
 };
 
+struct si_scissors {
+	struct r600_atom		atom;
+	unsigned			dirty_mask;
+	struct pipe_scissor_state	states[SI_MAX_VIEWPORTS];
+};
+
 #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
 
 struct si_context {
@@ -154,6 +162,7 @@ struct si_context {
 			struct r600_atom *msaa_config;
 			struct r600_atom *clip_regs;
 			struct r600_atom *shader_userdata;
+			struct r600_atom *scissors;
 		} s;
 		struct r600_atom *array[0];
 	} atoms;
@@ -181,6 +190,7 @@ struct si_context {
 	struct r600_resource		*border_color_table;
 	unsigned			border_color_offset;
 
+	struct si_scissors		scissors;
 	struct r600_atom		clip_regs;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ad32473b91e..c748f71430b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -278,8 +278,10 @@ static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
 		return &sctx->gs_shader->info;
 	else if (sctx->tes_shader)
 		return &sctx->tes_shader->info;
-	else
+	else if (sctx->vs_shader)
 		return &sctx->vs_shader->info;
+	else
+		return NULL;
 }
 
 static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 62eda97dc7e..8bd35a8422e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -526,26 +526,50 @@ static void si_set_scissor_states(struct pipe_context *ctx,
                                   const struct pipe_scissor_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_state_scissor *scissor;
-	struct si_pm4_state *pm4;
 	int i;
 
-	for (i = start_slot; i < start_slot + num_scissors; i++) {
-		int idx = i - start_slot;
-		int offset = i * 4 * 2;
+	for (i = 0; i < num_scissors; i++)
+		sctx->scissors.states[start_slot + i] = state[i];
 
-		scissor = CALLOC_STRUCT(si_state_scissor);
-		if (scissor == NULL)
-			return;
-		pm4 = &scissor->pm4;
-		scissor->scissor = state[idx];
-		si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
-			       S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
-			       S_028250_WINDOW_OFFSET_DISABLE(1));
-		si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
-			       S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
-		si_pm4_set_state(sctx, scissor[i], scissor);
+	sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+	si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
+static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct pipe_scissor_state *states = sctx->scissors.states;
+	unsigned mask = sctx->scissors.dirty_mask;
+
+	/* The simple case: Only 1 viewport is active. */
+	if (mask & 1 &&
+	    !si_get_vs_info(sctx)->writes_viewport_index) {
+		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+		radeon_emit(cs, S_028250_TL_X(states[0].minx) |
+				S_028250_TL_Y(states[0].miny) |
+				S_028250_WINDOW_OFFSET_DISABLE(1));
+		radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
+				S_028254_BR_Y(states[0].maxy));
+		sctx->scissors.dirty_mask &= ~1; /* clear one bit */
+		return;
+	}
+
+	while (mask) {
+		int start, count, i;
+
+		u_bit_scan_consecutive_range(&mask, &start, &count);
+
+		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+					       start * 4 * 2, count * 2);
+		for (i = start; i < start+count; i++) {
+			radeon_emit(cs, S_028250_TL_X(states[i].minx) |
+					S_028250_TL_Y(states[i].miny) |
+					S_028250_WINDOW_OFFSET_DISABLE(1));
+			radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
+					S_028254_BR_Y(states[i].maxy));
+		}
 	}
+	sctx->scissors.dirty_mask = 0;
 }
 
 static void si_set_viewport_states(struct pipe_context *ctx,
@@ -2986,6 +3010,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
 	si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+	si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
 
 	sctx->b.b.create_blend_state = si_create_blend_state;
 	sctx->b.b.bind_blend_state = si_bind_blend_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bf713c42e86..34dbba48050 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -48,11 +48,6 @@ struct si_state_sample_mask {
 	uint16_t		sample_mask;
 };
 
-struct si_state_scissor {
-	struct si_pm4_state		pm4;
-	struct pipe_scissor_state	scissor;
-};
-
 struct si_state_viewport {
 	struct si_pm4_state		pm4;
 	struct pipe_viewport_state	viewport;
@@ -96,7 +91,6 @@ union si_state {
 		struct si_pm4_state		*blend_color;
 		struct si_pm4_state		*clip;
 		struct si_state_sample_mask	*sample_mask;
-		struct si_state_scissor		*scissor[16];
 		struct si_state_viewport	*viewport[16];
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b223e060be3..5a9ef29a549 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -760,6 +760,23 @@ static void *si_create_tes_state(struct pipe_context *ctx,
 	return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
 }
 
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+static void si_update_viewports_and_scissors(struct si_context *sctx)
+{
+	struct tgsi_shader_info *info = si_get_vs_info(sctx);
+
+	if (!info || !info->writes_viewport_index)
+		return;
+
+	if (sctx->scissors.dirty_mask)
+	    si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -770,6 +787,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 
 	sctx->vs_shader = sel;
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
+	si_update_viewports_and_scissors(sctx);
 }
 
 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
@@ -787,6 +805,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 
 	if (enable_changed)
 		si_shader_change_notify(sctx);
+	si_update_viewports_and_scissors(sctx);
 }
 
 static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
@@ -821,6 +840,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
 		si_shader_change_notify(sctx);
 		sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
 	}
+	si_update_viewports_and_scissors(sctx);
 }
 
 static void si_make_dummy_ps(struct si_context *sctx)
-- 
cgit v1.2.3


From 8a97528b3a97a430a887e9044b938b349585f4ab Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 21:48:37 +0200
Subject: radeonsi: optimize viewport states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

same as scissors

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c          |  4 +-
 src/gallium/drivers/radeonsi/si_hw_context.c    |  2 +
 src/gallium/drivers/radeonsi/si_pipe.h          |  8 ++++
 src/gallium/drivers/radeonsi/si_state.c         | 58 +++++++++++++++++--------
 src/gallium/drivers/radeonsi/si_state.h         |  6 ---
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 +
 6 files changed, 54 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index b2f342f4fa1..c28b2a80088 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -65,9 +65,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 		util_blitter_save_sample_mask(sctx->blitter,
 					      sctx->queued.named.sample_mask->sample_mask);
 	}
-	if (sctx->queued.named.viewport[0]) {
-		util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
-	}
+	util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
 	util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
 	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
 	util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 873a4727976..8284306f30a 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -195,7 +195,9 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_all_descriptors_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+	si_mark_atom_dirty(ctx, &ctx->viewports.atom);
 
 	r600_postflush_resume_features(&ctx->b);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 9060f948971..a1845ba893d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -135,6 +135,12 @@ struct si_scissors {
 	struct pipe_scissor_state	states[SI_MAX_VIEWPORTS];
 };
 
+struct si_viewports {
+	struct r600_atom		atom;
+	unsigned			dirty_mask;
+	struct pipe_viewport_state	states[SI_MAX_VIEWPORTS];
+};
+
 #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
 
 struct si_context {
@@ -163,6 +169,7 @@ struct si_context {
 			struct r600_atom *clip_regs;
 			struct r600_atom *shader_userdata;
 			struct r600_atom *scissors;
+			struct r600_atom *viewports;
 		} s;
 		struct r600_atom *array[0];
 	} atoms;
@@ -191,6 +198,7 @@ struct si_context {
 	unsigned			border_color_offset;
 
 	struct si_scissors		scissors;
+	struct si_viewports		viewports;
 	struct r600_atom		clip_regs;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8bd35a8422e..940aaa0b247 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -578,29 +578,52 @@ static void si_set_viewport_states(struct pipe_context *ctx,
                                    const struct pipe_viewport_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_state_viewport *viewport;
-	struct si_pm4_state *pm4;
 	int i;
 
-	for (i = start_slot; i < start_slot + num_viewports; i++) {
-		int idx = i - start_slot;
-		int offset = i * 4 * 6;
+	for (i = 0; i < num_viewports; i++)
+		sctx->viewports.states[start_slot + i] = state[i];
 
-		viewport = CALLOC_STRUCT(si_state_viewport);
-		if (!viewport)
-			return;
-		pm4 = &viewport->pm4;
+	sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+	si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
+static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct pipe_viewport_state *states = sctx->viewports.states;
+	unsigned mask = sctx->viewports.dirty_mask;
+
+	/* The simple case: Only 1 viewport is active. */
+	if (mask & 1 &&
+	    !si_get_vs_info(sctx)->writes_viewport_index) {
+		r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+		radeon_emit(cs, fui(states[0].scale[0]));
+		radeon_emit(cs, fui(states[0].translate[0]));
+		radeon_emit(cs, fui(states[0].scale[1]));
+		radeon_emit(cs, fui(states[0].translate[1]));
+		radeon_emit(cs, fui(states[0].scale[2]));
+		radeon_emit(cs, fui(states[0].translate[2]));
+		sctx->viewports.dirty_mask &= ~1; /* clear one bit */
+		return;
+	}
 
-		viewport->viewport = state[idx];
-		si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
-		si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
-		si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
-		si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
-		si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
-		si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
+	while (mask) {
+		int start, count, i;
 
-		si_pm4_set_state(sctx, viewport[i], viewport);
+		u_bit_scan_consecutive_range(&mask, &start, &count);
+
+		r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+					       start * 4 * 6, count * 6);
+		for (i = start; i < start+count; i++) {
+			radeon_emit(cs, fui(states[i].scale[0]));
+			radeon_emit(cs, fui(states[i].translate[0]));
+			radeon_emit(cs, fui(states[i].scale[1]));
+			radeon_emit(cs, fui(states[i].translate[1]));
+			radeon_emit(cs, fui(states[i].scale[2]));
+			radeon_emit(cs, fui(states[i].translate[2]));
+		}
 	}
+	sctx->viewports.dirty_mask = 0;
 }
 
 /*
@@ -3011,6 +3034,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
 	si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
+	si_init_atom(&sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
 
 	sctx->b.b.create_blend_state = si_create_blend_state;
 	sctx->b.b.bind_blend_state = si_bind_blend_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 34dbba48050..32147831699 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -48,11 +48,6 @@ struct si_state_sample_mask {
 	uint16_t		sample_mask;
 };
 
-struct si_state_viewport {
-	struct si_pm4_state		pm4;
-	struct pipe_viewport_state	viewport;
-};
-
 struct si_state_rasterizer {
 	struct si_pm4_state	pm4;
 	bool			flatshade;
@@ -91,7 +86,6 @@ union si_state {
 		struct si_pm4_state		*blend_color;
 		struct si_pm4_state		*clip;
 		struct si_state_sample_mask	*sample_mask;
-		struct si_state_viewport	*viewport[16];
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 5a9ef29a549..4ca9aa50814 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -775,6 +775,8 @@ static void si_update_viewports_and_scissors(struct si_context *sctx)
 
 	if (sctx->scissors.dirty_mask)
 	    si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+	if (sctx->viewports.dirty_mask)
+	    si_mark_atom_dirty(sctx, &sctx->viewports.atom);
 }
 
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
-- 
cgit v1.2.3


From ba7a6cf6264dbb747f5b897d09bf1b98b232c1d0 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 21:59:22 +0200
Subject: radeonsi: define the state atom array separately
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h       | 20 +-------------------
 src/gallium/drivers/radeonsi/si_state.h      | 20 ++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_state_draw.c |  2 +-
 4 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 8284306f30a..5ef5db73aeb 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -59,7 +59,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	num_dw += cs->cdw;
 
 	if (count_draw_in) {
-		for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
+		for (i = 0; i < SI_NUM_ATOMS; i++) {
 			if (ctx->atoms.array[i]->dirty) {
 				num_dw += ctx->atoms.array[i]->num_dw;
 			}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a1845ba893d..5ca83e7a2d2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -141,8 +141,6 @@ struct si_viewports {
 	struct pipe_viewport_state	states[SI_MAX_VIEWPORTS];
 };
 
-#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
-
 struct si_context {
 	struct r600_common_context	b;
 	struct blitter_context		*blitter;
@@ -156,23 +154,7 @@ struct si_context {
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct si_shader_selector	*fixed_func_tcs_shader;
 
-	union {
-		struct {
-			/* The order matters. */
-			struct r600_atom *cache_flush;
-			struct r600_atom *streamout_begin;
-			struct r600_atom *streamout_enable; /* must be after streamout_begin */
-			struct r600_atom *framebuffer;
-			struct r600_atom *msaa_sample_locs;
-			struct r600_atom *db_render_state;
-			struct r600_atom *msaa_config;
-			struct r600_atom *clip_regs;
-			struct r600_atom *shader_userdata;
-			struct r600_atom *scissors;
-			struct r600_atom *viewports;
-		} s;
-		struct r600_atom *array[0];
-	} atoms;
+	union si_state_atoms		atoms;
 
 	struct si_framebuffer		framebuffer;
 	struct si_vertex_element	*vertex_elements;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 32147831699..881f6a55251 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -106,6 +106,26 @@ union si_state {
 	struct si_pm4_state	*array[0];
 };
 
+union si_state_atoms {
+	struct {
+		/* The order matters. */
+		struct r600_atom *cache_flush;
+		struct r600_atom *streamout_begin;
+		struct r600_atom *streamout_enable; /* must be after streamout_begin */
+		struct r600_atom *framebuffer;
+		struct r600_atom *msaa_sample_locs;
+		struct r600_atom *db_render_state;
+		struct r600_atom *msaa_config;
+		struct r600_atom *clip_regs;
+		struct r600_atom *shader_userdata;
+		struct r600_atom *scissors;
+		struct r600_atom *viewports;
+	} s;
+	struct r600_atom *array[0];
+};
+
+#define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
+
 struct si_shader_data {
 	struct r600_atom	atom;
 	uint32_t		sh_base[SI_NUM_SHADERS];
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index fd2fecaa741..87ee746ce6f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -822,7 +822,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	si_need_cs_space(sctx, 0, TRUE);
 
 	/* Emit states. */
-	for (i = 0; i < SI_NUM_ATOMS(sctx); i++) {
+	for (i = 0; i < SI_NUM_ATOMS; i++) {
 		if (sctx->atoms.array[i]->dirty) {
 			sctx->atoms.array[i]->emit(&sctx->b, sctx->atoms.array[i]);
 			sctx->atoms.array[i]->dirty = false;
-- 
cgit v1.2.3


From e191c58324ebd5c37223a5a2c16701d236bd9cb4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 23:26:50 +0200
Subject: radeonsi: initialize atom IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 940aaa0b247..f95684ec07f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -35,13 +35,15 @@
 #include "util/u_memory.h"
 #include "util/u_pstipple.h"
 
-static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
+static void si_init_atom(struct si_context *sctx,
+			 struct r600_atom *atom, struct r600_atom **list_elem,
 			 void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
 			 unsigned num_dw)
 {
 	atom->emit = (void*)emit_func;
 	atom->num_dw = num_dw;
 	atom->dirty = false;
+	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
 	*list_elem = atom;
 }
 
@@ -3030,11 +3032,11 @@ static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
-	si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
-	si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
-	si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
-	si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
-	si_init_atom(&sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
+	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
+	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
+	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
+	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
 
 	sctx->b.b.create_blend_state = si_create_blend_state;
 	sctx->b.b.bind_blend_state = si_bind_blend_state;
-- 
cgit v1.2.3


From 5bb0ad7ccc74e3aa69a1d55d2f7935587288312c Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Fri, 28 Aug 2015 23:52:47 +0200
Subject: radeonsi: call si_init_atom for remaining radeonsi atoms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I need to initialize more atom IDs.

This adds 4 more si_init_atom calls, which simplifies the code.
(si_init_atom needs a different context type of the emit functions though)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_compute.c     |  4 ++--
 src/gallium/drivers/radeonsi/si_cp_dma.c      |  4 ++--
 src/gallium/drivers/radeonsi/si_descriptors.c | 15 +++++++--------
 src/gallium/drivers/radeonsi/si_hw_context.c  |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c        | 10 ----------
 src/gallium/drivers/radeonsi/si_state.c       | 20 +++++++++-----------
 src/gallium/drivers/radeonsi/si_state.h       | 10 ++++++----
 src/gallium/drivers/radeonsi/si_state_draw.c  |  5 ++---
 8 files changed, 29 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 0cdecd6da79..e3caf5e0183 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -262,7 +262,7 @@ static void si_launch_grid(
 			 SI_CONTEXT_INV_KCACHE |
 			 SI_CONTEXT_FLUSH_WITH_INV_L2 |
 			 SI_CONTEXT_FLAG_COMPUTE;
-	si_emit_cache_flush(&sctx->b, NULL);
+	si_emit_cache_flush(sctx, NULL);
 
 	pm4->compute_pkt = true;
 
@@ -453,7 +453,7 @@ static void si_launch_grid(
 			 SI_CONTEXT_INV_ICACHE |
 			 SI_CONTEXT_INV_KCACHE |
 			 SI_CONTEXT_FLAG_COMPUTE;
-	si_emit_cache_flush(&sctx->b, NULL);
+	si_emit_cache_flush(sctx, NULL);
 }
 
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 7bdac97eaa4..8dd12f63e63 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -166,7 +166,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 		/* Flush the caches for the first copy only.
 		 * Also wait for the previous CP DMA operations. */
 		if (sctx->b.flags) {
-			si_emit_cache_flush(&sctx->b, NULL);
+			si_emit_cache_flush(sctx, NULL);
 			dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
 		}
 
@@ -230,7 +230,7 @@ void si_copy_buffer(struct si_context *sctx,
 
 		/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
 		if (sctx->b.flags) {
-			si_emit_cache_flush(&sctx->b, NULL);
+			si_emit_cache_flush(sctx, NULL);
 			sync_flags |= SI_CP_DMA_RAW_WAIT;
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b74c893c7d5..558814352aa 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -960,8 +960,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 	desc->pointer_dirty = keep_dirty;
 }
 
-static void si_emit_shader_userdata(struct si_context *sctx,
-				    struct r600_atom *atom)
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
 {
 	unsigned i;
 	uint32_t *sh_base = sctx->shader_userdata.sh_base;
@@ -1028,12 +1027,12 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
 
-	/* Shader user data. */
-	sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
-	sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
-
-	/* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
-	sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+	/* Shader user data.
+	 * The number of dwords is set to the upper bound:
+	 *   4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader.
+	 */
+	si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
+		     si_emit_shader_userdata, (SI_NUM_SHADERS * 4 + 1 + 2) * 4);
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 5ef5db73aeb..f821ad33ff3 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -123,7 +123,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
 			SI_CONTEXT_INV_TC_L2 |
 			/* this is probably not needed anymore */
 			SI_CONTEXT_PS_PARTIAL_FLUSH;
-	si_emit_cache_flush(&ctx->b, NULL);
+	si_emit_cache_flush(ctx, NULL);
 
 	/* force to keep tiling flags */
 	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 330b94665b3..03b0ee4a465 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -143,16 +143,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	si_init_all_descriptors(sctx);
 
-	/* Initialize cache_flush. */
-	sctx->cache_flush = si_atom_cache_flush;
-	sctx->atoms.s.cache_flush = &sctx->cache_flush;
-
-	sctx->msaa_sample_locs = si_atom_msaa_sample_locs;
-	sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs;
-
-	sctx->msaa_config = si_atom_msaa_config;
-	sctx->atoms.s.msaa_config = &sctx->msaa_config;
-
 	sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
 	sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f95684ec07f..cc34f3bc4f6 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -35,10 +35,10 @@
 #include "util/u_memory.h"
 #include "util/u_pstipple.h"
 
-static void si_init_atom(struct si_context *sctx,
-			 struct r600_atom *atom, struct r600_atom **list_elem,
-			 void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
-			 unsigned num_dw)
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+		  struct r600_atom **list_elem,
+		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
+		  unsigned num_dw)
 {
 	atom->emit = (void*)emit_func;
 	atom->num_dw = num_dw;
@@ -2306,10 +2306,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
 }
 
-static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
+static void si_emit_msaa_sample_locs(struct si_context *sctx,
 				     struct r600_atom *atom)
 {
-	struct si_context *sctx = (struct si_context *)rctx;
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	unsigned nr_samples = sctx->framebuffer.nr_samples;
 
@@ -2317,11 +2316,8 @@ static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
 						SI_NUM_SMOOTH_AA_SAMPLES);
 }
 
-const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
-
-static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
+static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct si_context *sctx = (struct si_context *)rctx;
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
 	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
@@ -2329,7 +2325,6 @@ static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_at
 				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
 }
 
-const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
 
 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 {
@@ -3032,8 +3027,11 @@ static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
+	si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush, 24);
 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
+	si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs, 18);
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
+	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
 	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
 	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 881f6a55251..a38a0f2571b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -257,10 +257,15 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
 void si_shader_change_notify(struct si_context *sctx);
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom);
 
 /* si_state.c */
 struct si_shader_selector;
 
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+		  struct r600_atom **list_elem,
+		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
+		  unsigned num_dw);
 void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                                enum pipe_format format,
@@ -287,10 +292,7 @@ void si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
 
 /* si_state_draw.c */
-extern const struct r600_atom si_atom_cache_flush;
-extern const struct r600_atom si_atom_msaa_sample_locs;
-extern const struct r600_atom si_atom_msaa_config;
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom);
+void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 void si_trace_emit(struct si_context *sctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 87ee746ce6f..2ff58d1f332 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -595,8 +595,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
 
 #define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
 
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom)
+void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 {
+	struct r600_common_context *sctx = &si_ctx->b;
 	struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
 	uint32_t cp_coher_cntl = 0;
 	uint32_t compute =
@@ -706,8 +707,6 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 	sctx->flags = 0;
 }
 
-const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 24 }; /* number of CS dwords */
-
 static void si_get_draw_start_count(struct si_context *sctx,
 				    const struct pipe_draw_info *info,
 				    unsigned *start, unsigned *count)
-- 
cgit v1.2.3


From 2fe040ee61d3c08e8d38c3552ad4e7b5060074a1 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 00:03:02 +0200
Subject: radeonsi: initialize atom IDs for external atoms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c  |  4 ----
 src/gallium/drivers/radeonsi/si_state.c | 13 +++++++++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 03b0ee4a465..e95824add40 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -142,10 +142,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	sctx->b.rings.gfx.flush = si_context_gfx_flush;
 
 	si_init_all_descriptors(sctx);
-
-	sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
-	sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
-
 	si_init_state_functions(sctx);
 	si_init_shader_functions(sctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index cc34f3bc4f6..88964e1a545 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -35,6 +35,16 @@
 #include "util/u_memory.h"
 #include "util/u_pstipple.h"
 
+/* Initialize an external atom (owned by ../radeon). */
+static void
+si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
+		      struct r600_atom **list_elem)
+{
+	atom->id = list_elem - sctx->atoms.array + 1;
+	*list_elem = atom;
+}
+
+/* Initialize an atom owned by radeonsi.  */
 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
 		  struct r600_atom **list_elem,
 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
@@ -3027,6 +3037,9 @@ static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
+	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
+	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
+
 	si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush, 24);
 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
 	si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs, 18);
-- 
cgit v1.2.3


From 87c1e9e19c6baa8c6fb03b0894c72744a07cde63 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 00:49:40 +0200
Subject: radeonsi: use a bitmask for tracking dirty atoms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This mainly removes the cache misses when checking the dirty flags.
Not much else though.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c | 10 ++++------
 src/gallium/drivers/radeonsi/si_pipe.h       |  8 +++++++-
 src/gallium/drivers/radeonsi/si_state_draw.c | 13 +++++++------
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index f821ad33ff3..2381b6c0004 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -31,7 +31,6 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
-	int i;
 
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_reloc) and two counters in the pipe
@@ -59,11 +58,10 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	num_dw += cs->cdw;
 
 	if (count_draw_in) {
-		for (i = 0; i < SI_NUM_ATOMS; i++) {
-			if (ctx->atoms.array[i]->dirty) {
-				num_dw += ctx->atoms.array[i]->num_dw;
-			}
-		}
+		unsigned mask = ctx->dirty_atoms;
+
+		while (mask)
+			num_dw += ctx->atoms.array[u_bit_scan(&mask)]->num_dw;
 
 		/* The number of dwords all the dirty states would take. */
 		num_dw += si_pm4_dirty_dw(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5ca83e7a2d2..35104cf7848 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -155,6 +155,7 @@ struct si_context {
 	struct si_shader_selector	*fixed_func_tcs_shader;
 
 	union si_state_atoms		atoms;
+	unsigned			dirty_atoms; /* mask */
 
 	struct si_framebuffer		framebuffer;
 	struct si_vertex_element	*vertex_elements;
@@ -339,7 +340,12 @@ static inline void
 si_set_atom_dirty(struct si_context *sctx,
 		  struct r600_atom *atom, bool dirty)
 {
-	atom->dirty = dirty;
+	unsigned bit = 1 << (atom->id - 1);
+
+	if (dirty)
+		sctx->dirty_atoms |= bit;
+	else
+		sctx->dirty_atoms &= ~bit;
 }
 
 static inline void
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2ff58d1f332..81575b53dd8 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -729,7 +729,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_index_buffer ib = {};
-	unsigned i;
+	unsigned mask;
 
 	if (!info->count && !info->indirect &&
 	    (info->indexed || !info->count_from_stream_output))
@@ -821,12 +821,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	si_need_cs_space(sctx, 0, TRUE);
 
 	/* Emit states. */
-	for (i = 0; i < SI_NUM_ATOMS; i++) {
-		if (sctx->atoms.array[i]->dirty) {
-			sctx->atoms.array[i]->emit(&sctx->b, sctx->atoms.array[i]);
-			sctx->atoms.array[i]->dirty = false;
-		}
+	mask = sctx->dirty_atoms;
+	while (mask) {
+		struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+
+		atom->emit(&sctx->b, atom);
 	}
+	sctx->dirty_atoms = 0;
 
 	si_pm4_emit_dirty(sctx);
 	si_emit_scratch_reloc(sctx);
-- 
cgit v1.2.3


From 0d46c3bc9d09b376d74f7399e1a2d1b0a923640b Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 01:45:28 +0200
Subject: radeonsi: remove the gs_rings state, add the registers to init_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c          |  1 -
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 -
 src/gallium/drivers/radeonsi/si_state.h         |  1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 27 +++++++++++++------------
 4 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e95824add40..c3f96e02d08 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -49,7 +49,6 @@ static void si_destroy_context(struct pipe_context *context)
 	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
-	si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
 	si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
 	for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 35104cf7848..687114b47a1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -201,7 +201,6 @@ struct si_context {
 
 	/* VGT states. */
 	struct si_pm4_state	*vgt_shader_config[4];
-	struct si_pm4_state	*gs_rings;
 	struct pipe_resource	*esgs_ring;
 	struct pipe_resource	*gsvs_ring;
 	struct si_pm4_state	*tf_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index a38a0f2571b..cd01074d778 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -96,7 +96,6 @@ union si_state {
 		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
 		struct si_pm4_state		*gs;
-		struct si_pm4_state		*gs_rings;
 		struct si_pm4_state		*tf_ring;
 		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4ca9aa50814..43dbe03c2e9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1050,8 +1050,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 	unsigned esgs_ring_size = 128 * 1024;
 	unsigned gsvs_ring_size = 60 * 1024 * 1024;
 
-	assert(!sctx->gs_rings);
-	sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
+	assert(!sctx->esgs_ring && !sctx->gsvs_ring);
 
 	sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 				       PIPE_USAGE_DEFAULT, esgs_ring_size);
@@ -1059,6 +1058,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 	sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
 
+	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= VI) {
 			/* The maximum sizes are 63.999 MB on VI, because
@@ -1066,17 +1066,23 @@ static void si_init_gs_rings(struct si_context *sctx)
 			assert(esgs_ring_size / 256 < (1 << 18));
 			assert(gsvs_ring_size / 256 < (1 << 18));
 		}
-		si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
 			       esgs_ring_size / 256);
-		si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
 			       gsvs_ring_size / 256);
 	} else {
-		si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE,
 			       esgs_ring_size / 256);
-		si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE,
 			       gsvs_ring_size / 256);
 	}
 
+	/* Flush the context to re-emit the init_config state.
+	 * This is done only once in a lifetime of a context.
+	 */
+	sctx->b.initial_gfx_cs_size = 0; /* force flush */
+	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
+
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
 			   sctx->esgs_ring, 0, esgs_ring_size,
 			   true, true, 4, 64, 0);
@@ -1113,8 +1119,8 @@ static void si_update_gs_rings(struct si_context *sctx)
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
 			   sctx->gsvs_ring, gsvs_itemsize,
 			   64, true, true, 4, 16, offset);
-
 }
+
 /**
  * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
  *          otherwise.
@@ -1412,16 +1418,11 @@ void si_update_shaders(struct si_context *sctx)
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader);
 
-		if (!sctx->gs_rings)
+		if (!sctx->gsvs_ring)
 			si_init_gs_rings(sctx);
 
-		if (sctx->emitted.named.gs_rings != sctx->gs_rings)
-			sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
-		si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
-
 		si_update_gs_rings(sctx);
 	} else {
-		si_pm4_bind_state(sctx, gs_rings, NULL);
 		si_pm4_bind_state(sctx, gs, NULL);
 		si_pm4_bind_state(sctx, es, NULL);
 	}
-- 
cgit v1.2.3


From a2c6ae07b443396b43493155c2418de9a5ee9591 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 01:45:28 +0200
Subject: radeonsi: remove the tf_ring state, add the registers to init_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One less state to worry about.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c          |  1 -
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 -
 src/gallium/drivers/radeonsi/si_state.h         |  1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 25 +++++++++++++------------
 4 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index c3f96e02d08..f6f77534b93 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -49,7 +49,6 @@ static void si_destroy_context(struct pipe_context *context)
 	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
-	si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
 	for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 687114b47a1..9fe17985290 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -203,7 +203,6 @@ struct si_context {
 	struct si_pm4_state	*vgt_shader_config[4];
 	struct pipe_resource	*esgs_ring;
 	struct pipe_resource	*gsvs_ring;
-	struct si_pm4_state	*tf_state;
 	struct pipe_resource	*tf_ring;
 
 	LLVMTargetMachineRef		tm;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index cd01074d778..8cf308d14da 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -96,7 +96,6 @@ union si_state {
 		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
 		struct si_pm4_state		*gs;
-		struct si_pm4_state		*tf_ring;
 		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*ps;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 43dbe03c2e9..8ef02c48855 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1255,8 +1255,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
-	assert(!sctx->tf_state);
-	sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+	assert(!sctx->tf_ring);
 
 	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_DEFAULT,
@@ -1265,26 +1264,28 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 			     sctx->tf_ring->width0, fui(0), false);
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
+	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
-		si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
 			       S_030938_SIZE(sctx->tf_ring->width0 / 4));
-		si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+		si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
 			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
 	} else {
-		si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+		si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
 			       S_008988_SIZE(sctx->tf_ring->width0 / 4));
-		si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+		si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
 			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
 	}
-	si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
-		      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
-	si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+	/* Flush the context to re-emit the init_config state.
+	 * This is done only once in a lifetime of a context.
+	 */
+	sctx->b.initial_gfx_cs_size = 0; /* force flush */
+	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
 
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
 			   SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
 			   sctx->tf_ring->width0, false, false, 0, 0, 0);
-
-	sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
 }
 
 /**
@@ -1373,7 +1374,7 @@ void si_update_shaders(struct si_context *sctx)
 
 	/* Update stages before GS. */
 	if (sctx->tes_shader) {
-		if (!sctx->tf_state)
+		if (!sctx->tf_ring)
 			si_init_tess_factor_ring(sctx);
 
 		/* VS as LS */
-- 
cgit v1.2.3


From c9a3196b14500bcd4330f9e7ba945474440a07f2 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 01:56:27 +0200
Subject: radeonsi: don't clear the tessellation factor ring buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Leftover from the bring-up.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ef02c48855..a4c33fa6202 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1260,8 +1260,6 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_DEFAULT,
 					   32768 * sctx->screen->b.info.max_se);
-	sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
-			     sctx->tf_ring->width0, fui(0), false);
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
 	/* Append these registers to the init config state. */
-- 
cgit v1.2.3


From c2a42d1f9ff104c562822d7ab2cbaa361666266b Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 02:02:29 +0200
Subject: radeonsi: don't rebind GSVS ring buffers every draw call using GS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 +
 src/gallium/drivers/radeonsi/si_shader.h        |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 11 ++++++++---
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 9fe17985290..47ad619ccdc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -234,6 +234,7 @@ struct si_context {
 	int			last_rast_prim;
 	unsigned		last_sc_line_stipple;
 	int			current_rast_prim; /* primitive type after TES, GS */
+	unsigned		last_gsvs_itemsize;
 
 	/* Scratch buffer */
 	boolean                 emit_scratch_reloc;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c748f71430b..2305b9988b8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -185,6 +185,7 @@ struct si_shader_selector {
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
+	unsigned	gsvs_itemsize;
 
 	/* masks of "get_unique_index" bits */
 	uint64_t	inputs_read;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a4c33fa6202..af91af97ce8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -679,6 +679,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 			sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
 		sel->gs_num_invocations =
 			sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
+		sel->gsvs_itemsize = sel->info.num_outputs * 16 *
+				     sel->gs_max_out_vertices;
 
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
@@ -1096,11 +1098,14 @@ static void si_init_gs_rings(struct si_context *sctx)
 
 static void si_update_gs_rings(struct si_context *sctx)
 {
-	unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
-	unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
-	unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+	unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
 	uint64_t offset;
 
+	if (gsvs_itemsize == sctx->last_gsvs_itemsize)
+		return;
+
+	sctx->last_gsvs_itemsize = gsvs_itemsize;
+
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
 			   sctx->gsvs_ring, gsvs_itemsize,
 			   64, true, true, 4, 16, 0);
-- 
cgit v1.2.3


From 0c2eed0edec877584c9362bd9cb9004ff10a8b91 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 02:32:13 +0200
Subject: radeonsi: avoid redundant CB and DB register updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.

Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/r600/r600_blit.c          |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
 src/gallium/drivers/radeon/r600_texture.c     |  4 +++-
 src/gallium/drivers/radeonsi/si_blit.c        |  7 +++++--
 src/gallium/drivers/radeonsi/si_hw_context.c  |  5 ++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |  2 ++
 src/gallium/drivers/radeonsi/si_state.c       | 26 ++++++++++++++++++++------
 7 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 22a0950a491..08b2f644cad 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 
 	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
 		evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
-					      &buffers, color);
+					      &buffers, NULL, color);
 		if (!buffers)
 			return; /* all buffers have been fast cleared */
 	}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 29db1cc4e07..d22c230ea3c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   struct pipe_framebuffer_state *fb,
 				   struct r600_atom *fb_state,
-				   unsigned *buffers,
+				   unsigned *buffers, unsigned *dirty_cbufs,
 				   const union pipe_color_union *color);
 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
 void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 54696910e43..89f18fb106f 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   struct pipe_framebuffer_state *fb,
 				   struct r600_atom *fb_state,
-				   unsigned *buffers,
+				   unsigned *buffers, unsigned *dirty_cbufs,
 				   const union pipe_color_union *color)
 {
 	int i;
@@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   tex->cmask.offset, tex->cmask.size, 0, true);
 
 		tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+		if (dirty_cbufs)
+			*dirty_cbufs |= 1 << i;
 		rctx->set_atom_dirty(rctx, fb_state, true);
 		*buffers &= ~clear_bit;
 	}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c28b2a80088..d1486bd822d 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
 
 	if (buffers & PIPE_CLEAR_COLOR) {
-		evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
-					      &buffers, color);
+		evergreen_do_fast_color_clear(&sctx->b, fb,
+					      &sctx->framebuffer.atom, &buffers,
+					      &sctx->framebuffer.dirty_cbufs,
+					      color);
 		if (!buffers)
 			return; /* all buffers have been fast cleared */
 	}
@@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		}
 
 		zstex->depth_clear_value = depth;
+		sctx->framebuffer.dirty_zsbuf = true;
 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
 		sctx->db_depth_clear = true;
 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 2381b6c0004..561378196b5 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx)
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
 
-	si_mark_atom_dirty(ctx, &ctx->clip_regs);
+	ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+	ctx->framebuffer.dirty_zsbuf = true;
 	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+	si_mark_atom_dirty(ctx, &ctx->clip_regs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 47ad619ccdc..9be4aa7d5b5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -127,6 +127,8 @@ struct si_framebuffer {
 	unsigned			cb0_is_integer;
 	unsigned			compressed_cb_mask;
 	unsigned			export_16bpc;
+	unsigned			dirty_cbufs;
+	bool				dirty_zsbuf;
 };
 
 struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 88964e1a545..3c250484e3a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 			 SI_CONTEXT_INV_TC_L2 |
 			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
 
+	/* Take the maximum of the old and new count. If the new count is lower,
+	 * dirtying is needed to disable the unbound colorbuffers.
+	 */
+	sctx->framebuffer.dirty_cbufs |=
+		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
 	sctx->framebuffer.export_16bpc = 0;
@@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
 	/* Colorbuffers. */
 	for (i = 0; i < nr_cbufs; i++) {
+		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+			continue;
+
 		cb = (struct r600_surface*)state->cbufs[i];
 		if (!cb) {
 			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			radeon_emit(cs, 0);	/* R_028C94_CB_COLOR0_DCC_BASE */
 	}
 	/* set CB_COLOR1_INFO for possible dual-src blending */
-	if (i == 1 && state->cbufs[0]) {
+	if (i == 1 && state->cbufs[0] &&
+	    sctx->framebuffer.dirty_cbufs & (1 << 0)) {
 		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
 				       cb->cb_color_info | tex->cb_color_info);
 		i++;
 	}
-	for (; i < 8 ; i++) {
-		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
-	}
+	for (; i < 8 ; i++)
+		if (sctx->framebuffer.dirty_cbufs & (1 << i))
+			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
 	/* ZS buffer. */
-	if (state->zsbuf) {
+	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
@@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
 		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       zb->pa_su_poly_offset_db_fmt_cntl);
-	} else {
+	} else if (sctx->framebuffer.dirty_zsbuf) {
 		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
@@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
 	r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+	sctx->framebuffer.dirty_cbufs = 0;
+	sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
-- 
cgit v1.2.3


From 12b205341acd2d95887099e14a217902fe21a476 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 14:54:58 +0200
Subject: radeonsi: convert clip state into an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reducing calloc overhead.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h       |  6 ++++++
 src/gallium/drivers/radeonsi/si_state.c      | 24 +++++++++++-------------
 src/gallium/drivers/radeonsi/si_state.h      |  2 +-
 4 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 561378196b5..689f8166b65 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -189,6 +189,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
 
 	si_mark_atom_dirty(ctx, &ctx->clip_regs);
+	si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 9be4aa7d5b5..829acf735f6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -131,6 +131,11 @@ struct si_framebuffer {
 	bool				dirty_zsbuf;
 };
 
+struct si_clip_state {
+	struct r600_atom		atom;
+	struct pipe_clip_state		state;
+};
+
 struct si_scissors {
 	struct r600_atom		atom;
 	unsigned			dirty_mask;
@@ -184,6 +189,7 @@ struct si_context {
 
 	struct si_scissors		scissors;
 	struct si_viewports		viewports;
+	struct si_clip_state		clip_state;
 	struct r600_atom		clip_regs;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3c250484e3a..d3e05b5e024 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -473,22 +473,13 @@ static void si_set_clip_state(struct pipe_context *ctx,
 			      const struct pipe_clip_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	struct pipe_constant_buffer cb;
 
-	if (pm4 == NULL)
+	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
 		return;
 
-	for (int i = 0; i < 6; i++) {
-		si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
-			       fui(state->ucp[i][0]));
-		si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
-			       fui(state->ucp[i][1]));
-		si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
-			       fui(state->ucp[i][2]));
-		si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
-			       fui(state->ucp[i][3]));
-        }
+	sctx->clip_state.state = *state;
+	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
 
 	cb.buffer = NULL;
 	cb.user_buffer = state->ucp;
@@ -496,8 +487,14 @@ static void si_set_clip_state(struct pipe_context *ctx,
 	cb.buffer_size = 4*4*8;
 	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
 	pipe_resource_reference(&cb.buffer, NULL);
+}
+
+static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
-	si_pm4_set_state(sctx, clip, pm4);
+	r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
+	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
 }
 
 #define SIX_BITS 0x3F
@@ -3060,6 +3057,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
 	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
 	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8cf308d14da..a8a22eb5dfc 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -84,7 +84,6 @@ union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_pm4_state		*blend_color;
-		struct si_pm4_state		*clip;
 		struct si_state_sample_mask	*sample_mask;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
@@ -115,6 +114,7 @@ union si_state_atoms {
 		struct r600_atom *db_render_state;
 		struct r600_atom *msaa_config;
 		struct r600_atom *clip_regs;
+		struct r600_atom *clip_state;
 		struct r600_atom *shader_userdata;
 		struct r600_atom *scissors;
 		struct r600_atom *viewports;
-- 
cgit v1.2.3


From 74aa64876b54bc2d0088bc9ed2d390eaa2b73349 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 15:05:53 +0200
Subject: radeonsi: convert sample mask state into an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c       |  5 +----
 src/gallium/drivers/radeonsi/si_hw_context.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c       |  2 ++
 src/gallium/drivers/radeonsi/si_pipe.h       |  6 ++++++
 src/gallium/drivers/radeonsi/si_state.c      | 23 ++++++++++++++---------
 src/gallium/drivers/radeonsi/si_state.h      |  7 +------
 6 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index d1486bd822d..08828ac8446 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -61,10 +61,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
 	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
 	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
-	if (sctx->queued.named.sample_mask) {
-		util_blitter_save_sample_mask(sctx->blitter,
-					      sctx->queued.named.sample_mask->sample_mask);
-	}
+	util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
 	util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
 	util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
 	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 689f8166b65..c706d75c161 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -192,6 +192,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
+	si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index f6f77534b93..da774789525 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -151,6 +151,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 		goto fail;
 	sctx->blitter->draw_rectangle = r600_draw_rectangle;
 
+	sctx->sample_mask.sample_mask = 0xffff;
+
 	/* these must be last */
 	si_begin_new_cs(sctx);
 	r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 829acf735f6..4d479e0990e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -136,6 +136,11 @@ struct si_clip_state {
 	struct pipe_clip_state		state;
 };
 
+struct si_sample_mask {
+	struct r600_atom	atom;
+	uint16_t		sample_mask;
+};
+
 struct si_scissors {
 	struct r600_atom		atom;
 	unsigned			dirty_mask;
@@ -191,6 +196,7 @@ struct si_context {
 	struct si_viewports		viewports;
 	struct si_clip_state		clip_state;
 	struct r600_atom		clip_regs;
+	struct si_sample_mask		sample_mask;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
 	int				ps_iter_samples;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index d3e05b5e024..80c7bde6db0 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2819,18 +2819,22 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
-	struct si_pm4_state *pm4 = &state->pm4;
-	uint16_t mask = sample_mask;
 
-        if (state == NULL)
-                return;
+	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
+		return;
 
-	state->sample_mask = mask;
-	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
-	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
+	sctx->sample_mask.sample_mask = sample_mask;
+	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
+}
+
+static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	unsigned mask = sctx->sample_mask.sample_mask;
 
-	si_pm4_set_state(sctx, sample_mask, state);
+	r600_write_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+	radeon_emit(cs, mask | (mask << 16));
+	radeon_emit(cs, mask | (mask << 16));
 }
 
 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
@@ -3056,6 +3060,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs, 18);
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
+	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask, 4);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
 	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index a8a22eb5dfc..77c4dd118dc 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -43,11 +43,6 @@ struct si_state_blend {
 	bool			dual_src_blend;
 };
 
-struct si_state_sample_mask {
-	struct si_pm4_state	pm4;
-	uint16_t		sample_mask;
-};
-
 struct si_state_rasterizer {
 	struct si_pm4_state	pm4;
 	bool			flatshade;
@@ -84,7 +79,6 @@ union si_state {
 	struct {
 		struct si_state_blend		*blend;
 		struct si_pm4_state		*blend_color;
-		struct si_state_sample_mask	*sample_mask;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
@@ -113,6 +107,7 @@ union si_state_atoms {
 		struct r600_atom *msaa_sample_locs;
 		struct r600_atom *db_render_state;
 		struct r600_atom *msaa_config;
+		struct r600_atom *sample_mask;
 		struct r600_atom *clip_regs;
 		struct r600_atom *clip_state;
 		struct r600_atom *shader_userdata;
-- 
cgit v1.2.3


From c44de3097925e0d7b4f310432448a62a681189d5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 15:05:53 +0200
Subject: radeonsi: convert blend color state into an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h       |  6 ++++++
 src/gallium/drivers/radeonsi/si_state.c      | 20 ++++++++++++--------
 src/gallium/drivers/radeonsi/si_state.h      |  2 +-
 4 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index c706d75c161..99aa039a0f4 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -193,6 +193,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+	si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4d479e0990e..60124ea5cb5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -93,6 +93,11 @@ struct si_screen {
 	struct r600_common_screen	b;
 };
 
+struct si_blend_color {
+	struct r600_atom		atom;
+	struct pipe_blend_color		state;
+};
+
 struct si_sampler_view {
 	struct pipe_sampler_view	base;
 	struct list_head		list;
@@ -192,6 +197,7 @@ struct si_context {
 	struct r600_resource		*border_color_table;
 	unsigned			border_color_offset;
 
+	struct si_blend_color		blend_color;
 	struct si_scissors		scissors;
 	struct si_viewports		viewports;
 	struct si_clip_state		clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 80c7bde6db0..df1a13c6692 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -452,17 +452,20 @@ static void si_set_blend_color(struct pipe_context *ctx,
 			       const struct pipe_blend_color *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 
-        if (pm4 == NULL)
-                return;
+	if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
+		return;
 
-	si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
-	si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
-	si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
-	si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
+	sctx->blend_color.state = *state;
+	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
+}
+
+static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
-	si_pm4_set_state(sctx, blend_color, pm4);
+	r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
 }
 
 /*
@@ -3061,6 +3064,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask, 4);
+	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color, 6);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
 	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 77c4dd118dc..68a7cf9c3b4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -78,7 +78,6 @@ struct si_vertex_element
 union si_state {
 	struct {
 		struct si_state_blend		*blend;
-		struct si_pm4_state		*blend_color;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
@@ -108,6 +107,7 @@ union si_state_atoms {
 		struct r600_atom *db_render_state;
 		struct r600_atom *msaa_config;
 		struct r600_atom *sample_mask;
+		struct r600_atom *blend_color;
 		struct r600_atom *clip_regs;
 		struct r600_atom *clip_state;
 		struct r600_atom *shader_userdata;
-- 
cgit v1.2.3


From e21418f221f645397847c867b5f368ad0753e6fe Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 29 Aug 2015 17:00:11 +0200
Subject: radeonsi: convert stencil ref state into an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c       |  2 +-
 src/gallium/drivers/radeonsi/si_hw_context.c |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h       |  2 +-
 src/gallium/drivers/radeonsi/si_state.c      | 66 ++++++++++++++--------------
 src/gallium/drivers/radeonsi/si_state.h      | 18 ++++++--
 5 files changed, 50 insertions(+), 39 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 08828ac8446..93fa67a953e 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -53,7 +53,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 
 	util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
 	util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
-	util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref);
+	util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
 	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
 	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
 	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 99aa039a0f4..fbac95d5b29 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -195,6 +195,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
 	si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
+	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 60124ea5cb5..6eb0cb3bc92 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -177,7 +177,6 @@ struct si_context {
 	struct si_framebuffer		framebuffer;
 	struct si_vertex_element	*vertex_elements;
 	/* for saving when using blitter */
-	struct pipe_stencil_ref		stencil_ref;
 	/* shaders */
 	struct si_shader_selector	*ps_shader;
 	struct si_shader_selector	*gs_shader;
@@ -198,6 +197,7 @@ struct si_context {
 	unsigned			border_color_offset;
 
 	struct si_blend_color		blend_color;
+	struct si_stencil_ref		stencil_ref;
 	struct si_scissors		scissors;
 	struct si_viewports		viewports;
 	struct si_clip_state		clip_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index df1a13c6692..20e690b2bcf 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -825,39 +825,33 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state)
 /*
  * infeered state between dsa and stencil ref
  */
-static void si_update_dsa_stencil_ref(struct si_context *sctx)
+static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct si_pm4_state *pm4;
-	struct pipe_stencil_ref *ref = &sctx->stencil_ref;
-	struct si_state_dsa *dsa = sctx->queued.named.dsa;
-
-	if (!dsa)
-		return;
-
-	pm4 = CALLOC_STRUCT(si_pm4_state);
-	if (pm4 == NULL)
-		return;
-
-	si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
-		       S_028430_STENCILTESTVAL(ref->ref_value[0]) |
-		       S_028430_STENCILMASK(dsa->valuemask[0]) |
-		       S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
-		       S_028430_STENCILOPVAL(1));
-	si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
-		       S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
-		       S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
-		       S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
-		       S_028434_STENCILOPVAL_BF(1));
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
+	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
 
-	si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
+	r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
+			S_028430_STENCILMASK(dsa->valuemask[0]) |
+			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
+			S_028430_STENCILOPVAL(1));
+	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
+			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
+			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
+			S_028434_STENCILOPVAL_BF(1));
 }
 
-static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
-				    const struct pipe_stencil_ref *state)
+static void si_set_stencil_ref(struct pipe_context *ctx,
+			       const struct pipe_stencil_ref *state)
 {
         struct si_context *sctx = (struct si_context *)ctx;
-        sctx->stencil_ref = *state;
-	si_update_dsa_stencil_ref(sctx);
+
+	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
+		return;
+
+	sctx->stencil_ref.state = *state;
+	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
 }
 
 
@@ -904,10 +898,10 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	dsa->valuemask[0] = state->stencil[0].valuemask;
-	dsa->valuemask[1] = state->stencil[1].valuemask;
-	dsa->writemask[0] = state->stencil[0].writemask;
-	dsa->writemask[1] = state->stencil[1].writemask;
+	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
+	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
+	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
+	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
 
 	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
 		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
@@ -960,7 +954,12 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
                 return;
 
 	si_pm4_bind_state(sctx, dsa, dsa);
-	si_update_dsa_stencil_ref(sctx);
+
+	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
+		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
+		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
+		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
+	}
 }
 
 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
@@ -3069,6 +3068,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
 	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
 	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
+	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref, 4);
 
 	sctx->b.b.create_blend_state = si_create_blend_state;
 	sctx->b.b.bind_blend_state = si_bind_blend_state;
@@ -3091,7 +3091,7 @@ void si_init_state_functions(struct si_context *sctx)
 	sctx->b.b.set_clip_state = si_set_clip_state;
 	sctx->b.b.set_scissor_states = si_set_scissor_states;
 	sctx->b.b.set_viewport_states = si_set_viewport_states;
-	sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
+	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
 
 	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
 	sctx->b.b.get_sample_position = cayman_get_sample_position;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 68a7cf9c3b4..60483db19b5 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -60,13 +60,23 @@ struct si_state_rasterizer {
 	bool			poly_smooth;
 };
 
-struct si_state_dsa {
-	struct si_pm4_state	pm4;
-	unsigned		alpha_func;
+struct si_dsa_stencil_ref_part {
 	uint8_t			valuemask[2];
 	uint8_t			writemask[2];
 };
 
+struct si_state_dsa {
+	struct si_pm4_state		pm4;
+	unsigned			alpha_func;
+	struct si_dsa_stencil_ref_part	stencil_ref;
+};
+
+struct si_stencil_ref {
+	struct r600_atom		atom;
+	struct pipe_stencil_ref		state;
+	struct si_dsa_stencil_ref_part	dsa_part;
+};
+
 struct si_vertex_element
 {
 	unsigned			count;
@@ -82,7 +92,6 @@ union si_state {
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
 		struct si_pm4_state		*fb_blend;
-		struct si_pm4_state		*dsa_stencil_ref;
 		struct si_pm4_state		*ta_bordercolor_base;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
@@ -113,6 +122,7 @@ union si_state_atoms {
 		struct r600_atom *shader_userdata;
 		struct r600_atom *scissors;
 		struct r600_atom *viewports;
+		struct r600_atom *stencil_ref;
 	} s;
 	struct r600_atom *array[0];
 };
-- 
cgit v1.2.3


From 8a67e78bb8f66a5a142222fdb4d193da1a03ed22 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 00:16:01 +0200
Subject: radeonsi: don't set VGT_VTX_CNT_EN twice in init_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 20e690b2bcf..d52b9379bad 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3279,7 +3279,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
 
 	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
-	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
 	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
 
 	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
-- 
cgit v1.2.3


From 45e549fcbc8c2454e242155f0cf4c21360f0b958 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 00:50:42 +0200
Subject: radeonsi: convert CB_TARGET_MASK setup to an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c    |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 +
 src/gallium/drivers/radeonsi/si_state.c         | 23 +++++++++--------------
 src/gallium/drivers/radeonsi/si_state.h         |  3 +--
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 +-
 5 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index fbac95d5b29..28aed79a0bf 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -193,6 +193,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+	si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
 	si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6eb0cb3bc92..e5d5d10851f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -203,6 +203,7 @@ struct si_context {
 	struct si_clip_state		clip_state;
 	struct r600_atom		clip_regs;
 	struct si_sample_mask		sample_mask;
+	struct r600_atom		cb_target_mask;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
 	int				ps_iter_samples;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index d52b9379bad..0c1448eb2d5 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -249,23 +249,18 @@ static unsigned si_pack_float_12p4(float x)
  *
  * Another reason is to avoid a hang with dual source blending.
  */
-void si_update_fb_blend_state(struct si_context *sctx)
+static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct si_pm4_state *pm4;
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	struct si_state_blend *blend = sctx->queued.named.blend;
 	uint32_t mask = 0, i;
 
-	if (blend == NULL)
-		return;
-
-	pm4 = CALLOC_STRUCT(si_pm4_state);
-	if (pm4 == NULL)
-		return;
-
 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
 		if (sctx->framebuffer.state.cbufs[i])
 			mask |= 0xf << (4*i);
-	mask &= blend->cb_target_mask;
+
+	if (blend)
+		mask &= blend->cb_target_mask;
 
 	/* Avoid a hang that happens when dual source blending is enabled
 	 * but there is not enough color outputs. This is undefined behavior,
@@ -277,8 +272,7 @@ void si_update_fb_blend_state(struct si_context *sctx)
 	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
 		mask = 0;
 
-	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
-	si_pm4_set_state(sctx, fb_blend, pm4);
+	r600_write_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
 }
 
 /*
@@ -439,7 +433,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
-	si_update_fb_blend_state(sctx);
+	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
 }
 
 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -2164,7 +2158,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	}
 
 	si_update_fb_rs_state(sctx);
-	si_update_fb_blend_state(sctx);
+	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
 
 	sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
 	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
@@ -3063,6 +3057,7 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask, 4);
+	si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask, 3);
 	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color, 6);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 60483db19b5..40be40a6fa4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -91,7 +91,6 @@ union si_state {
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
-		struct si_pm4_state		*fb_blend;
 		struct si_pm4_state		*ta_bordercolor_base;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
@@ -116,6 +115,7 @@ union si_state_atoms {
 		struct r600_atom *db_render_state;
 		struct r600_atom *msaa_config;
 		struct r600_atom *sample_mask;
+		struct r600_atom *cb_target_mask;
 		struct r600_atom *blend_color;
 		struct r600_atom *clip_regs;
 		struct r600_atom *clip_state;
@@ -269,7 +269,6 @@ void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
 		  struct r600_atom **list_elem,
 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
 		  unsigned num_dw);
-void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                                enum pipe_format format,
                                enum pipe_texture_target target,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index af91af97ce8..702af8c803e 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -873,7 +873,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}
 
 	sctx->ps_shader = sel;
-	si_update_fb_blend_state(sctx);
+	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
 }
 
 static void si_delete_shader_selector(struct pipe_context *ctx,
-- 
cgit v1.2.3


From 0da159ecacbc2dc89e7866679912fdc3e73e20a1 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 01:35:03 +0200
Subject: radeonsi: rename and precalculate polygon offset states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

one less calloc and state construction while drawing

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 79 ++++++++++++++++++---------------
 src/gallium/drivers/radeonsi/si_state.h |  6 +--
 2 files changed, 45 insertions(+), 40 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 0c1448eb2d5..c20ea94d7df 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -635,48 +635,25 @@ static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
 /*
  * inferred state between framebuffer and rasterizer
  */
-static void si_update_fb_rs_state(struct si_context *sctx)
+static void si_update_poly_offset_state(struct si_context *sctx)
 {
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-	struct si_pm4_state *pm4;
-	float offset_units;
 
 	if (!rs || !sctx->framebuffer.state.zsbuf)
 		return;
 
-	offset_units = sctx->queued.named.rasterizer->offset_units;
 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
-	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-	case PIPE_FORMAT_X8Z24_UNORM:
-	case PIPE_FORMAT_Z24X8_UNORM:
-	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-		offset_units *= 2.0f;
+	case PIPE_FORMAT_Z16_UNORM:
+		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
+		break;
+	default: /* 24-bit */
+		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
 		break;
 	case PIPE_FORMAT_Z32_FLOAT:
 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-		offset_units *= 1.0f;
-		break;
-	case PIPE_FORMAT_Z16_UNORM:
-		offset_units *= 4.0f;
+		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
 		break;
-	default:
-		return;
 	}
-
-	pm4 = CALLOC_STRUCT(si_pm4_state);
-
-	if (pm4 == NULL)
-		return;
-
-	/* FIXME some of those reg can be computed with cso */
-	si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
-		       fui(sctx->queued.named.rasterizer->offset_scale));
-	si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
-	si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
-		       fui(sctx->queued.named.rasterizer->offset_scale));
-	si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
-
-	si_pm4_set_state(sctx, fb_rs, pm4);
 }
 
 /*
@@ -703,7 +680,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 {
 	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
 	struct si_pm4_state *pm4 = &rs->pm4;
-	unsigned tmp;
+	unsigned tmp, i;
 	float psize_min, psize_max;
 
 	if (rs == NULL) {
@@ -731,10 +708,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
 
-	/* offset */
-	rs->offset_units = state->offset_units;
-	rs->offset_scale = state->offset_scale * 16.0f;
-
 	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
 		S_0286D4_FLAT_SHADE_ENA(1) |
 		S_0286D4_PNT_SPRITE_ENA(1) |
@@ -787,6 +760,35 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
 		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
 		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
+
+	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
+	for (i = 0; i < 3; i++) {
+		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
+		float offset_units = state->offset_units;
+		float offset_scale = state->offset_scale * 16.0f;
+
+		switch (i) {
+		case 0: /* 16-bit zbuffer */
+			offset_units *= 4.0f;
+			break;
+		case 1: /* 24-bit zbuffer */
+			offset_units *= 2.0f;
+			break;
+		case 2: /* 32-bit zbuffer */
+			offset_units *= 1.0f;
+			break;
+		}
+
+		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
+			       fui(offset_scale));
+		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+			       fui(offset_units));
+		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
+			       fui(offset_scale));
+		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+			       fui(offset_units));
+	}
+
 	return rs;
 }
 
@@ -805,7 +807,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	si_pm4_bind_state(sctx, rasterizer, rs);
-	si_update_fb_rs_state(sctx);
+	si_update_poly_offset_state(sctx);
 
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 }
@@ -813,6 +815,9 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
+
+	if (sctx->queued.named.rasterizer == state)
+		si_pm4_bind_state(sctx, poly_offset, NULL);
 	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
 }
 
@@ -2157,7 +2162,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		r600_context_add_resource_size(ctx, surf->base.texture);
 	}
 
-	si_update_fb_rs_state(sctx);
+	si_update_poly_offset_state(sctx);
 	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
 
 	sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 40be40a6fa4..a396f07e926 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -45,6 +45,8 @@ struct si_state_blend {
 
 struct si_state_rasterizer {
 	struct si_pm4_state	pm4;
+	/* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
+	struct si_pm4_state	pm4_poly_offset[3];
 	bool			flatshade;
 	bool			two_side;
 	bool			multisample_enable;
@@ -53,8 +55,6 @@ struct si_state_rasterizer {
 	unsigned		pa_sc_line_stipple;
 	unsigned		pa_cl_clip_cntl;
 	unsigned		clip_plane_enable;
-	float			offset_units;
-	float			offset_scale;
 	bool			poly_stipple_enable;
 	bool			line_smooth;
 	bool			poly_smooth;
@@ -90,7 +90,7 @@ union si_state {
 		struct si_state_blend		*blend;
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
-		struct si_pm4_state		*fb_rs;
+		struct si_pm4_state		*poly_offset;
 		struct si_pm4_state		*ta_bordercolor_base;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
-- 
cgit v1.2.3


From d2e63ac042ce4b0ff7d4645fc9bc8d2d73967b7e Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 01:54:00 +0200
Subject: gallium/radeon: rename write_*_reg functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

e.g. radeon_set_context_reg is nicer and looks consistent next to
radeon_emit().

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/r600/evergreen_compute.c |  20 ++---
 src/gallium/drivers/r600/evergreen_state.c   |  94 +++++++++++-----------
 src/gallium/drivers/r600/r600_hw_context.c   |   4 +-
 src/gallium/drivers/r600/r600_pipe.h         |  20 ++---
 src/gallium/drivers/r600/r600_state.c        | 112 +++++++++++++--------------
 src/gallium/drivers/r600/r600_state_common.c |  32 ++++----
 src/gallium/drivers/radeon/cayman_msaa.c     |  36 ++++-----
 src/gallium/drivers/radeon/r600_cs.h         |  24 +++---
 src/gallium/drivers/radeon/r600_streamout.c  |  14 ++--
 src/gallium/drivers/radeonsi/si_state.c      |  54 ++++++-------
 src/gallium/drivers/radeonsi/si_state_draw.c |  30 +++----
 11 files changed, 220 insertions(+), 220 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index c52e43e9c2a..ede9a1b3edc 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -379,17 +379,17 @@ static void evergreen_emit_direct_dispatch(
 				"allocating %u dwords lds.\n",
 				num_pipes, num_waves, lds_size);
 
-	r600_write_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
+	radeon_set_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
 
-	r600_write_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
+	radeon_set_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
 	radeon_emit(cs, 0); /* R_00899C_VGT_COMPUTE_START_X */
 	radeon_emit(cs, 0); /* R_0089A0_VGT_COMPUTE_START_Y */
 	radeon_emit(cs, 0); /* R_0089A4_VGT_COMPUTE_START_Z */
 
-	r600_write_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
+	radeon_set_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
 								group_size);
 
-	r600_write_compute_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
+	radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
 	radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
 	radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
 	radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
@@ -402,7 +402,7 @@ static void evergreen_emit_direct_dispatch(
 		assert(lds_size <= 8160);
 	}
 
-	r600_write_compute_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
+	radeon_compute_set_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
 					lds_size | (num_waves << 14));
 
 	/* Dispatch packet */
@@ -444,7 +444,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 						       RADEON_USAGE_READWRITE,
 						       RADEON_PRIO_SHADER_RESOURCE_RW);
 
-		r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
+		radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
 		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
 		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
 		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
@@ -466,17 +466,17 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	}
 	if (ctx->keep_tiling_flags) {
 		for (; i < 8 ; i++) {
-			r600_write_compute_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+			radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
 						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
 		}
 		for (; i < 12; i++) {
-			r600_write_compute_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
+			radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
 						       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
 		}
 	}
 
 	/* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
-	r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
+	radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
 					ctx->compute_cb_target_mask);
 
 
@@ -556,7 +556,7 @@ void evergreen_emit_cs_shader(
 	nstack = shader->bc.nstack;
 #endif
 
-	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
+	radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
 	radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
 	radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
 			S_0288D4_NUM_GPRS(ngpr)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 7c82390ba40..5c03f0e6c44 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -857,7 +857,7 @@ static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_ato
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
-	r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
+	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
 	radeon_emit_array(cs, (unsigned*)state, 6*4);
 }
 
@@ -910,7 +910,7 @@ static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_
 
 	evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
 
-	r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+	radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
 	radeon_emit(cs, tl);
 	radeon_emit(cs, br);
 }
@@ -1505,34 +1505,34 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
 		nr_samples = 0;
 		break;
 	case 2:
-		r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
+		radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
 		radeon_emit_array(cs, eg_sample_locs_2x, Elements(eg_sample_locs_2x));
 		max_dist = eg_max_dist_2x;
 		break;
 	case 4:
-		r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
+		radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
 		radeon_emit_array(cs, eg_sample_locs_4x, Elements(eg_sample_locs_4x));
 		max_dist = eg_max_dist_4x;
 		break;
 	case 8:
-		r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
+		radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
 		radeon_emit_array(cs, sample_locs_8x, Elements(sample_locs_8x));
 		max_dist = max_dist_8x;
 		break;
 	}
 
 	if (nr_samples > 1) {
-		r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
 				     S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
 				     S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
-		r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
 	} else {
-		r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
-		r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
 	}
 }
 
@@ -1556,7 +1556,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 		cb = (struct r600_surface*)state->cbufs[i];
 		if (!cb) {
-			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
 					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
 			continue;
 		}
@@ -1578,7 +1578,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 			cmask_reloc = reloc;
 		}
 
-		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
 		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
 		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
 		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
@@ -1612,7 +1612,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	}
 	/* set CB_COLOR1_INFO for possible dual-src blending */
 	if (i == 1 && state->cbufs[0]) {
-		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+		radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
 				       cb->cb_color_info | tex->cb_color_info);
 
 		if (!rctx->keep_tiling_flags) {
@@ -1629,10 +1629,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	}
 	if (rctx->keep_tiling_flags) {
 		for (; i < 8 ; i++) {
-			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
+			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 		}
 		for (; i < 12; i++) {
-			r600_write_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
+			radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
 		}
 	}
 
@@ -1647,11 +1647,11 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 							       RADEON_PRIO_DEPTH_BUFFER_MSAA :
 							       RADEON_PRIO_DEPTH_BUFFER);
 
-		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+		radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       zb->pa_su_poly_offset_db_fmt_cntl);
-		r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
 
-		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
+		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
 		radeon_emit(cs, zb->db_z_info);		/* R_028040_DB_Z_INFO */
 		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
 		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
@@ -1680,7 +1680,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	} else if (rctx->screen->b.info.drm_minor >= 18) {
 		/* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
 		 * Older kernels are out of luck. */
-		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
 	}
@@ -1688,7 +1688,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	/* Framebuffer dimensions. */
 	evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br);
 
-	r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+	radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
 	radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
 	radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
 
@@ -1720,7 +1720,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
 	default:;
 	}
 
-	r600_write_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+	radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
 	radeon_emit(cs, fui(offset_scale));
 	radeon_emit(cs, fui(offset_units));
 	radeon_emit(cs, fui(offset_scale));
@@ -1734,7 +1734,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
 	unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
 	unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
 
-	r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+	radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
 	radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
 	/* This must match the used export instructions exactly.
 	 * Other values may lead to undefined behavior and hangs.
@@ -1751,17 +1751,17 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 		struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
 		unsigned reloc_idx;
 
-		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
-		r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
-		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
 		reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
 	} else {
-		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
-		r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
 	}
 }
 
@@ -1822,11 +1822,11 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 		db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
 	}
 
-	r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
 	radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
 	radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */
-	r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
-	r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+	radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
 }
 
 static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
@@ -1910,9 +1910,9 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		va = rbuffer->gpu_address + cb->buffer_offset;
 
 		if (!gs_ring_buffer) {
-			r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+			radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
 						    ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
-			r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+			radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
 						    pkt_flags);
 		}
 
@@ -2062,7 +2062,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 		radeon_emit_array(cs, rstate->tex_sampler_words, 3);
 
 		if (rstate->border_color_use) {
-			r600_write_config_reg_seq(cs, border_index_reg, 5);
+			radeon_set_config_reg_seq(cs, border_index_reg, 5);
 			radeon_emit(cs, i);
 			radeon_emit_array(cs, rstate->border_color.ui, 4);
 		}
@@ -2100,7 +2100,7 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+	radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
@@ -2110,7 +2110,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	uint16_t mask = s->sample_mask;
 
-	r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+	radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
 	radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */
 	radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */
 }
@@ -2121,7 +2121,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
-	r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
+	radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
 			       (shader->buffer->gpu_address + shader->offset) >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
@@ -2162,9 +2162,9 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
 			primid = 1;
 	}
 
-	r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
-	r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
-	r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+	radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
+	radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+	radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
 }
 
 static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
@@ -2173,36 +2173,36 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
-	r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+	radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
 
 	if (state->enable) {
 		rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
-		r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
+		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
-		r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
 				state->esgs_ring.buffer_size >> 8);
 
 		rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
-		r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
+		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
-		r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
 				state->gsvs_ring.buffer_size >> 8);
 	} else {
-		r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
-		r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
 	}
 
-	r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+	radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
 }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 64451516c23..d5eec15f1fb 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -235,7 +235,7 @@ void r600_flush_emit(struct r600_context *rctx)
 		/* Use of WAIT_UNTIL is deprecated on Cayman+ */
 		if (rctx->b.family < CHIP_CAYMAN) {
 			/* wait for things to settle */
-			r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
+			radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
 		}
 	}
 
@@ -269,7 +269,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 
 	/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
 	if (ctx->b.chip_class == R600) {
-		r600_write_context_reg(cs, R_028350_SX_MISC, 0);
+		radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
 	}
 
 	/* force to keep tiling flags */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ee3e928861b..8d5fd99e65a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -880,14 +880,14 @@ static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
 void r600_release_command_buffer(struct r600_command_buffer *cb);
 
-static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_compute_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
-	r600_write_context_reg_seq(cs, reg, num);
+	radeon_set_context_reg_seq(cs, reg, num);
 	/* Set the compute bit on the packet header */
 	cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
 }
 
-static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CTL_CONST_OFFSET);
 	assert(cs->cdw+2+num <= cs->max_dw);
@@ -895,24 +895,24 @@ static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigne
 	cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
 }
 
-static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_compute_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	r600_write_compute_context_reg_seq(cs, reg, 1);
+	radeon_compute_set_context_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void radeon_set_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
 {
 	if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
-		r600_write_compute_context_reg(cs, reg, value);
+		radeon_compute_set_context_reg(cs, reg, value);
 	} else {
-		r600_write_context_reg(cs, reg, value);
+		radeon_set_context_reg(cs, reg, value);
 	}
 }
 
-static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	r600_write_ctl_const_seq(cs, reg, 1);
+	radeon_set_ctl_const_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 89e959b6b0f..1af96f64d40 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -260,7 +260,7 @@ static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom
 	default:;
 	}
 
-	r600_write_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+	radeon_set_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
 	radeon_emit(cs, fui(offset_scale));
 	radeon_emit(cs, fui(offset_units));
 	radeon_emit(cs, fui(offset_scale));
@@ -757,7 +757,7 @@ static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *at
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
-	r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
+	radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
 	radeon_emit_array(cs, (unsigned*)state, 6*4);
 }
 
@@ -774,12 +774,12 @@ static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom
 	unsigned offset = rstate->idx * 4 * 2;
 
 	if (rctx->b.chip_class != R600 || rctx->scissor[0].enable) {
-		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
 		radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
 				     S_028240_WINDOW_OFFSET_DISABLE(1));
 		radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
 	} else {
-		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
 		radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
 				     S_028240_WINDOW_OFFSET_DISABLE(1));
 		radeon_emit(cs, S_028244_BR_X(8192) | S_028244_BR_Y(8192));
@@ -1322,15 +1322,15 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 			nr_samples = 0;
 			break;
 		case 2:
-			r600_write_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
+			radeon_set_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
 			max_dist = max_dist_2x;
 			break;
 		case 4:
-			r600_write_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
+			radeon_set_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
 			max_dist = max_dist_4x;
 			break;
 		case 8:
-			r600_write_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
+			radeon_set_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
 			radeon_emit(cs, sample_locs_8x[0]); /* R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0 */
 			radeon_emit(cs, sample_locs_8x[1]); /* R_008B4C_PA_SC_AA_SAMPLE_LOCS_8S_WD1 */
 			max_dist = max_dist_8x;
@@ -1339,25 +1339,25 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 	} else {
 		switch (nr_samples) {
 		default:
-			r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+			radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
 			radeon_emit(cs, 0); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
 			radeon_emit(cs, 0); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
 			nr_samples = 0;
 			break;
 		case 2:
-			r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+			radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
 			radeon_emit(cs, sample_locs_2x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
 			radeon_emit(cs, sample_locs_2x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
 			max_dist = max_dist_2x;
 			break;
 		case 4:
-			r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+			radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
 			radeon_emit(cs, sample_locs_4x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
 			radeon_emit(cs, sample_locs_4x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
 			max_dist = max_dist_4x;
 			break;
 		case 8:
-			r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+			radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
 			radeon_emit(cs, sample_locs_8x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
 			radeon_emit(cs, sample_locs_8x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
 			max_dist = max_dist_8x;
@@ -1366,13 +1366,13 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 	}
 
 	if (nr_samples > 1) {
-		r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
 				     S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
 				     S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
 	} else {
-		r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
 		radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
 	}
@@ -1387,7 +1387,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	unsigned i, sbu = 0;
 
 	/* Colorbuffers. */
-	r600_write_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
+	radeon_set_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
 	for (i = 0; i < nr_cbufs; i++) {
 		radeon_emit(cs, cb[i] ? cb[i]->cb_color_info : 0);
 	}
@@ -1408,7 +1408,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 				continue;
 
 			/* COLOR_BASE */
-			r600_write_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
+			radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
 
 			reloc = r600_context_bo_reloc(&rctx->b,
 						      &rctx->b.rings.gfx,
@@ -1421,7 +1421,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_emit(cs, reloc);
 
 			/* FMASK */
-			r600_write_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
+			radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
 
 			reloc = r600_context_bo_reloc(&rctx->b,
 						      &rctx->b.rings.gfx,
@@ -1434,7 +1434,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_emit(cs, reloc);
 
 			/* CMASK */
-			r600_write_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
+			radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
 
 			reloc = r600_context_bo_reloc(&rctx->b,
 						      &rctx->b.rings.gfx,
@@ -1447,17 +1447,17 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_emit(cs, reloc);
 		}
 
-		r600_write_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
+		radeon_set_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
 		for (i = 0; i < nr_cbufs; i++) {
 			radeon_emit(cs, cb[i] ? cb[i]->cb_color_size : 0);
 		}
 
-		r600_write_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
+		radeon_set_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
 		for (i = 0; i < nr_cbufs; i++) {
 			radeon_emit(cs, cb[i] ? cb[i]->cb_color_view : 0);
 		}
 
-		r600_write_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
+		radeon_set_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
 		for (i = 0; i < nr_cbufs; i++) {
 			radeon_emit(cs, cb[i] ? cb[i]->cb_color_mask : 0);
 		}
@@ -1483,26 +1483,26 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 							       RADEON_PRIO_DEPTH_BUFFER_MSAA :
 							       RADEON_PRIO_DEPTH_BUFFER);
 
-		r600_write_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+		radeon_set_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       surf->pa_su_poly_offset_db_fmt_cntl);
 
-		r600_write_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
+		radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
 		radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE */
 		radeon_emit(cs, surf->db_depth_view); /* R_028004_DB_DEPTH_VIEW */
-		r600_write_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
+		radeon_set_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
 		radeon_emit(cs, surf->db_depth_base); /* R_02800C_DB_DEPTH_BASE */
 		radeon_emit(cs, surf->db_depth_info); /* R_028010_DB_DEPTH_INFO */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, reloc);
 
-		r600_write_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
+		radeon_set_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
 
 		sbu |= SURFACE_BASE_UPDATE_DEPTH;
 	} else if (rctx->screen->b.info.drm_minor >= 18) {
 		/* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
 		 * Older kernels are out of luck. */
-		r600_write_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
+		radeon_set_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
 	}
 
 	/* SURFACE_BASE_UPDATE */
@@ -1513,19 +1513,19 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	}
 
 	/* Framebuffer dimensions. */
-	r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+	radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
 	radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
 			     S_028240_WINDOW_OFFSET_DISABLE(1)); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
 	radeon_emit(cs, S_028244_BR_X(state->width) |
 			     S_028244_BR_Y(state->height)); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
 
 	if (rctx->framebuffer.is_msaa_resolve) {
-		r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
+		radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
 	} else {
 		/* Always enable the first colorbuffer in CB_SHADER_CONTROL. This
 		 * will assure that the alpha-test will work even if there is
 		 * no colorbuffer bound. */
-		r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
+		radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
 				       (1ull << MAX2(nr_cbufs, 1)) - 1);
 	}
 
@@ -1553,7 +1553,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 
 	if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
-		r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+		radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
 		if (rctx->b.chip_class == R600) {
 			radeon_emit(cs, 0xff); /* R_028238_CB_TARGET_MASK */
 			radeon_emit(cs, 0xff); /* R_02823C_CB_SHADER_MASK */
@@ -1561,17 +1561,17 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
 			radeon_emit(cs, 0xf); /* R_028238_CB_TARGET_MASK */
 			radeon_emit(cs, 0xf); /* R_02823C_CB_SHADER_MASK */
 		}
-		r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
+		radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
 	} else {
 		unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
 		unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
 		unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1;
 
-		r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+		radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
 		radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
 		/* Always enable the first color output to make sure alpha-test works even without one. */
 		radeon_emit(cs, 0xf | (multiwrite ? fb_colormask : ps_colormask)); /* R_02823C_CB_SHADER_MASK */
-		r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL,
+		radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL,
 				       a->cb_color_control |
 				       S_028808_MULTIWRITE_ENABLE(multiwrite));
 	}
@@ -1586,15 +1586,15 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 		struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
 		unsigned reloc_idx;
 
-		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-		r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
-		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
 		reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
 	} else {
-		r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
 	}
 }
 
@@ -1658,10 +1658,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 		db_render_override |= S_028D10_MAX_TILES_IN_DTT(6);
 	}
 
-	r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
+	radeon_set_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
 	radeon_emit(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
 	radeon_emit(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
-	r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
 }
 
 static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1669,8 +1669,8 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct r600_config_state *a = (struct r600_config_state*)atom;
 
-	r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
-	r600_write_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
+	radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
+	radeon_set_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
 }
 
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
@@ -1731,9 +1731,9 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		offset = cb->buffer_offset;
 
 		if (!gs_ring_buffer) {
-			r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
+			radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
 					       ALIGN_DIVUP(cb->buffer_size >> 4, 16));
-			r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
+			radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
 		}
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1878,7 +1878,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
 
 			offset = border_color_reg;
 			offset += i * 16;
-			r600_write_config_reg_seq(cs, offset, 4);
+			radeon_set_config_reg_seq(cs, offset, 4);
 			radeon_emit_array(cs, rstate->border_color.ui, 4);
 		}
 	}
@@ -1912,7 +1912,7 @@ static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_a
 	if (!rctx->seamless_cube_map.enabled) {
 		tmp |= S_009508_DISABLE_CUBE_WRAP(1);
 	}
-	r600_write_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
+	radeon_set_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
 }
 
 static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -1920,7 +1920,7 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
+	radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
@@ -1930,7 +1930,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
-	r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
+	radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
@@ -1967,8 +1967,8 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
 			primid = 1;
 	}
 
-	r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
-	r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+	radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+	radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
 }
 
 static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
@@ -1977,34 +1977,34 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
-	r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+	radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
 
 	if (state->enable) {
 		rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
-		r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
+		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
-		r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
 				state->esgs_ring.buffer_size >> 8);
 
 		rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
-		r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
+		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
-		r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
 				state->gsvs_ring.buffer_size >> 8);
 	} else {
-		r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
-		r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
 	}
 
-	r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+	radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
 }
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index a65064945cf..9f6884d2109 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -85,10 +85,10 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom
 		alpha_ref &= ~0x1FFF;
 	}
 
-	r600_write_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
+	radeon_set_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
 			       a->sx_alpha_test_control |
 			       S_028410_ALPHA_TEST_BYPASS(a->bypass));
-	r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
+	radeon_set_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
 }
 
 static void r600_texture_barrier(struct pipe_context *ctx)
@@ -215,7 +215,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct pipe_blend_color *state = &rctx->blend_color.state;
 
-	r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
 	radeon_emit(cs, fui(state->color[0])); /* R_028414_CB_BLEND_RED */
 	radeon_emit(cs, fui(state->color[1])); /* R_028418_CB_BLEND_GREEN */
 	radeon_emit(cs, fui(state->color[2])); /* R_02841C_CB_BLEND_BLUE */
@@ -227,13 +227,13 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
 
-	r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
-	r600_write_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
+	radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
+	radeon_set_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
 	radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */
 	radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */
 	if (a->last_draw_was_indirect) {
 		a->last_draw_was_indirect = false;
-		r600_write_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+		radeon_set_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 	}
 }
 
@@ -268,7 +268,7 @@ void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
 
-	r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
 	radeon_emit(cs, /* R_028430_DB_STENCILREFMASK */
 			 S_028430_STENCILREF(a->state.ref_value[0]) |
 			 S_028430_STENCILMASK(a->state.valuemask[0]) |
@@ -718,7 +718,7 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
 	struct pipe_viewport_state *state = &rstate->state;
 	int offset = rstate->idx * 6 * 4;
 
-	r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
+	radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
 	radeon_emit(cs, fui(state->scale[0]));     /* R_02843C_PA_CL_VPORT_XSCALE_0  */
 	radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
 	radeon_emit(cs, fui(state->scale[1]));     /* R_028444_PA_CL_VPORT_YSCALE_0  */
@@ -1401,11 +1401,11 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct r600_clip_misc_state *state = &rctx->clip_misc_state;
 
-	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
 			       state->pa_cl_clip_cntl |
 			       (state->clip_dist_write ? 0 : state->clip_plane_enable & 0x3F) |
                                S_028810_CLIP_DISABLE(state->clip_disable));
-	r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
 			       state->pa_cl_vs_out_cntl |
 			       (state->clip_plane_enable & state->clip_dist_write));
 }
@@ -1550,7 +1550,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		    rctx->b.streamout.prims_gen_query_enabled)
 			partial_vs_wave = true;
 
-		r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
+		radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
 				       S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
 				       S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
 				       S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
@@ -1572,12 +1572,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		    info.mode == R600_PRIM_RECTANGLE_LIST) {
 			su_sc_mode_cntl &= C_028814_CULL_FRONT;
 		}
-		r600_write_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
+		radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
 	}
 
 	/* Update start instance. */
 	if (!info.indirect && rctx->last_start_instance != info.start_instance) {
-		r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
+		radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
 		rctx->last_start_instance = info.start_instance;
 	}
 
@@ -1591,10 +1591,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			 info.mode == PIPE_PRIM_LINE_LOOP)
 			ls_mask = 2;
 
-		r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+		radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
 				       S_028A0C_AUTO_RESET_CNTL(ls_mask) |
 				       (rctx->rasterizer ? rctx->rasterizer->pa_sc_line_stipple : 0));
-		r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
+		radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
 				      r600_conv_pipe_prim(info.mode));
 
 		rctx->last_primitive_type = info.mode;
@@ -1678,7 +1678,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
 			uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
 
-			r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
+			radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
 			cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c
index 12a5f604755..c6afa8256db 100644
--- a/src/gallium/drivers/radeon/cayman_msaa.c
+++ b/src/gallium/drivers/radeon/cayman_msaa.c
@@ -144,19 +144,19 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
 {
 	switch (nr_samples) {
 	case 2:
-		r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
-		r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
-		r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
-		r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
 		break;
 	case 4:
-		r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
-		r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
-		r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
-		r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
 		break;
 	case 8:
-		r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
 		radeon_emit(cs, cm_sample_locs_8x[0]);
 		radeon_emit(cs, cm_sample_locs_8x[4]);
 		radeon_emit(cs, 0);
@@ -173,7 +173,7 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
 		radeon_emit(cs, cm_sample_locs_8x[7]);
 		break;
 	case 16:
-		r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
 		radeon_emit(cs, cm_sample_locs_16x[0]);
 		radeon_emit(cs, cm_sample_locs_16x[4]);
 		radeon_emit(cs, cm_sample_locs_16x[8]);
@@ -213,7 +213,7 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
 		unsigned log_ps_iter_samples =
 			util_logbase2(util_next_power_of_two(ps_iter_samples));
 
-		r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
@@ -221,30 +221,30 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
 
 		if (nr_samples > 1) {
-			r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
-			r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
 					     EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
 		} else if (overrast_samples > 1) {
-			r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
-			r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
 		}
 	} else {
-		r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
 
-		r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
-		r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
 	}
 }
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index 03a04b754d6..188abccb507 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -74,7 +74,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
 	}
 }
 
-static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg < R600_CONTEXT_REG_OFFSET);
 	assert(cs->cdw+2+num <= cs->max_dw);
@@ -82,13 +82,13 @@ static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsign
 	radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
 }
 
-static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	r600_write_config_reg_seq(cs, reg, 1);
+	radeon_set_config_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CONTEXT_REG_OFFSET);
 	assert(cs->cdw+2+num <= cs->max_dw);
@@ -96,13 +96,13 @@ static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsig
 	radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
 }
 
-static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	r600_write_context_reg_seq(cs, reg, 1);
+	radeon_set_context_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
 	assert(cs->cdw+2+num <= cs->max_dw);
@@ -110,13 +110,13 @@ static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg
 	radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
 }
 
-static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	si_write_sh_reg_seq(cs, reg, 1);
+	radeon_set_sh_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
 	assert(cs->cdw+2+num <= cs->max_dw);
@@ -124,9 +124,9 @@ static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsign
 	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
 }
 
-static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-	cik_write_uconfig_reg_seq(cs, reg, 1);
+	radeon_set_uconfig_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index 0853f636a27..5198f1e041d 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -165,9 +165,9 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
 	}
 
 	if (rctx->chip_class >= CIK) {
-		cik_write_uconfig_reg(cs, reg_strmout_cntl, 0);
+		radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
 	} else {
-		r600_write_config_reg(cs, reg_strmout_cntl, 0);
+		radeon_set_config_reg(cs, reg_strmout_cntl, 0);
 	}
 
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@@ -201,7 +201,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 			/* SI binds streamout buffers as shader resources.
 			 * VGT only counts primitives and tells the shader
 			 * through SGPRs what to do. */
-			r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+			radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
 			radeon_emit(cs, (t[i]->b.buffer_offset +
 					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
 			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
@@ -210,7 +210,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 
 			update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
 
-			r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+			radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
 			radeon_emit(cs, (t[i]->b.buffer_offset +
 					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
 			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
@@ -295,7 +295,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
 		 * primitives emitted) may be enabled even if there is not
 		 * buffer bound. This ensures that the primitives-emitted query
 		 * won't increment. */
-		r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+		radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
 
 		t[i]->buf_filled_size_valid = true;
 	}
@@ -336,8 +336,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx,
 			S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
 			S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
 	}
-	r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
-	r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
+	radeon_set_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+	radeon_set_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
 }
 
 static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index c20ea94d7df..5c922b04c0a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -272,7 +272,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
 	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
 		mask = 0;
 
-	r600_write_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
+	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
 }
 
 /*
@@ -458,7 +458,7 @@ static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
-	r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
 }
 
@@ -490,7 +490,7 @@ static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
-	r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
+	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
 }
 
@@ -505,7 +505,7 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 	unsigned clipdist_mask =
 		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
 
-	r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
 		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
 		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
 		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
@@ -519,7 +519,7 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
 		(sctx->queued.named.rasterizer->clip_plane_enable &
 		 clipdist_mask));
-	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
 		sctx->queued.named.rasterizer->pa_cl_clip_cntl |
 		(clipdist_mask ? 0 :
 		 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
@@ -550,7 +550,7 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
 	/* The simple case: Only 1 viewport is active. */
 	if (mask & 1 &&
 	    !si_get_vs_info(sctx)->writes_viewport_index) {
-		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
 		radeon_emit(cs, S_028250_TL_X(states[0].minx) |
 				S_028250_TL_Y(states[0].miny) |
 				S_028250_WINDOW_OFFSET_DISABLE(1));
@@ -565,7 +565,7 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
 
 		u_bit_scan_consecutive_range(&mask, &start, &count);
 
-		r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
 					       start * 4 * 2, count * 2);
 		for (i = start; i < start+count; i++) {
 			radeon_emit(cs, S_028250_TL_X(states[i].minx) |
@@ -602,7 +602,7 @@ static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
 	/* The simple case: Only 1 viewport is active. */
 	if (mask & 1 &&
 	    !si_get_vs_info(sctx)->writes_viewport_index) {
-		r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+		radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
 		radeon_emit(cs, fui(states[0].scale[0]));
 		radeon_emit(cs, fui(states[0].translate[0]));
 		radeon_emit(cs, fui(states[0].scale[1]));
@@ -618,7 +618,7 @@ static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
 
 		u_bit_scan_consecutive_range(&mask, &start, &count);
 
-		r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+		radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
 					       start * 4 * 6, count * 6);
 		for (i = start; i < start+count; i++) {
 			radeon_emit(cs, fui(states[i].scale[0]));
@@ -830,7 +830,7 @@ static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
 
-	r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
 	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
 			S_028430_STENCILMASK(dsa->valuemask[0]) |
 			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
@@ -989,7 +989,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned db_shader_control;
 
-	r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
 
 	/* DB_RENDER_CONTROL */
 	if (sctx->dbcb_depth_copy_enabled ||
@@ -1034,10 +1034,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 
 	/* DB_RENDER_OVERRIDE2 */
 	if (sctx->db_depth_disable_expclear) {
-		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+		radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
 			S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
 	} else {
-		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
+		radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
 	}
 
 	db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
@@ -1053,7 +1053,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
-	r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
 			       db_shader_control);
 }
 
@@ -2229,7 +2229,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
 		cb = (struct r600_surface*)state->cbufs[i];
 		if (!cb) {
-			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
 					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
 			continue;
 		}
@@ -2247,7 +2247,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 				RADEON_PRIO_COLOR_META);
 		}
 
-		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
 					   sctx->b.chip_class >= VI ? 14 : 13);
 		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
 		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
@@ -2269,13 +2269,13 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 	/* set CB_COLOR1_INFO for possible dual-src blending */
 	if (i == 1 && state->cbufs[0] &&
 	    sctx->framebuffer.dirty_cbufs & (1 << 0)) {
-		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+		radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
 				       cb->cb_color_info | tex->cb_color_info);
 		i++;
 	}
 	for (; i < 8 ; i++)
 		if (sctx->framebuffer.dirty_cbufs & (1 << i))
-			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
+			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
 	/* ZS buffer. */
 	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
@@ -2294,10 +2294,10 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 					      RADEON_PRIO_DEPTH_META);
 		}
 
-		r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
-		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
+		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
 
-		r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
+		radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
 		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
 		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
 			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
@@ -2309,19 +2309,19 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
 		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
 
-		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
-		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+		radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       zb->pa_su_poly_offset_db_fmt_cntl);
 	} else if (sctx->framebuffer.dirty_zsbuf) {
-		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
 	}
 
 	/* Framebuffer dimensions. */
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
-	r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
+	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
 
 	sctx->framebuffer.dirty_cbufs = 0;
@@ -2833,7 +2833,7 @@ static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	unsigned mask = sctx->sample_mask.sample_mask;
 
-	r600_write_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
 	radeon_emit(cs, mask | (mask << 16));
 	radeon_emit(cs, mask | (mask << 16));
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 81575b53dd8..ebcc2691773 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -176,8 +176,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	/* Due to a hw bug, RSRC2_LS must be written twice with another
 	 * LS register written in between. */
 	if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
-		si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
-	si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+		radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+	radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
 	radeon_emit(cs, ls->current->ls_rsrc1);
 	radeon_emit(cs, ls_rsrc2);
 
@@ -199,19 +199,19 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 			  ((perpatch_output_offset / 16) << 16);
 
 	/* Set them for LS. */
-	si_write_sh_reg(cs,
+	radeon_set_sh_reg(cs,
 		R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
 		tcs_in_layout);
 
 	/* Set them for TCS. */
-	si_write_sh_reg_seq(cs,
+	radeon_set_sh_reg_seq(cs,
 		R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
 	radeon_emit(cs, tcs_out_offsets);
 	radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
 	radeon_emit(cs, tcs_in_layout);
 
 	/* Set them for TES. */
-	si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+	radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
 	radeon_emit(cs, tcs_out_offsets);
 	radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
 }
@@ -347,7 +347,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
 	if (!sctx->emit_scratch_reloc)
 		return;
 
-	r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+	radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
 			       sctx->spi_tmpring_size);
 
 	if (sctx->scratch_buffer) {
@@ -378,7 +378,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 	    rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
 		return;
 
-	r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+	radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
 		rs->pa_sc_line_stipple |
 		S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 :
 					 rast_prim == PIPE_PRIM_LINE_STRIP ? 2 : 0));
@@ -411,9 +411,9 @@ static void si_emit_draw_registers(struct si_context *sctx,
 			radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
 			radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
 		} else {
-			r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
-			r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
-			r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+			radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
+			radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+			radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
 		}
 		sctx->last_prim = prim;
 		sctx->last_multi_vgt_param = ia_multi_vgt_param;
@@ -421,19 +421,19 @@ static void si_emit_draw_registers(struct si_context *sctx,
 	}
 
 	if (gs_out_prim != sctx->last_gs_out_prim) {
-		r600_write_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
+		radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
 		sctx->last_gs_out_prim = gs_out_prim;
 	}
 
 	/* Primitive restart. */
 	if (info->primitive_restart != sctx->last_primitive_restart_en) {
-		r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
+		radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
 		sctx->last_primitive_restart_en = info->primitive_restart;
 
 		if (info->primitive_restart &&
 		    (info->restart_index != sctx->last_restart_index ||
 		     sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
-			r600_write_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+			radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
 					       info->restart_index);
 			sctx->last_restart_index = info->restart_index;
 		}
@@ -453,7 +453,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		uint64_t va = t->buf_filled_size->gpu_address +
 			      t->buf_filled_size_offset;
 
-		r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
 				       t->stride_in_dw);
 
 		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
@@ -508,7 +508,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		    sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
 		    info->start_instance != sctx->last_start_instance ||
 		    sh_base_reg != sctx->last_sh_base_reg) {
-			si_write_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
+			radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
 			radeon_emit(cs, base_vertex);
 			radeon_emit(cs, info->start_instance);
 
-- 
cgit v1.2.3


From 7ff2991e344130c8eb6e4be0b146320b3f02c1e6 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 02:04:37 +0200
Subject: gallium/radeon: rename r600_context_bo_reloc ->
 radeon_add_to_buffer_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this name should be easy to understand without other knowledge

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/r600/evergreen_compute.c    |  4 +--
 src/gallium/drivers/r600/evergreen_hw_context.c |  6 ++---
 src/gallium/drivers/r600/evergreen_state.c      | 28 ++++++++++----------
 src/gallium/drivers/r600/r600_hw_context.c      |  8 +++---
 src/gallium/drivers/r600/r600_state.c           | 28 ++++++++++----------
 src/gallium/drivers/r600/r600_state_common.c    | 12 ++++-----
 src/gallium/drivers/radeon/r600_cs.h            | 22 +++++++++++-----
 src/gallium/drivers/radeonsi/cik_sdma.c         |  8 +++---
 src/gallium/drivers/radeonsi/si_cp_dma.c        |  6 ++---
 src/gallium/drivers/radeonsi/si_descriptors.c   | 34 ++++++++++++-------------
 src/gallium/drivers/radeonsi/si_dma.c           |  8 +++---
 src/gallium/drivers/radeonsi/si_pm4.c           |  2 +-
 src/gallium/drivers/radeonsi/si_state.c         |  8 +++---
 src/gallium/drivers/radeonsi/si_state_draw.c    | 10 ++++----
 14 files changed, 97 insertions(+), 87 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index ede9a1b3edc..33009c16f68 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
 	for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
 		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
-		unsigned reloc = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+		unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
 						       (struct r600_resource*)cb->base.texture,
 						       RADEON_USAGE_READWRITE,
 						       RADEON_PRIO_SHADER_RESOURCE_RW);
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
 	radeon_emit(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
 
 	radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
-	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 					      code_bo, RADEON_USAGE_READ,
 					      RADEON_PRIO_SHADER_DATA));
 }
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index e2728565489..29bdd9daddb 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -64,9 +64,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 	for (i = 0; i < ncopy; i++) {
 		csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
 				      RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_MIN);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
 		cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
@@ -129,7 +129,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 					      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
 					      RADEON_PRIO_MIN);
 
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 5c03f0e6c44..9ef92741879 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1562,7 +1562,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 		}
 
 		tex = (struct r600_texture *)cb->base.texture;
-		reloc = r600_context_bo_reloc(&rctx->b,
+		reloc = radeon_add_to_buffer_list(&rctx->b,
 					      &rctx->b.rings.gfx,
 					      (struct r600_resource*)cb->base.texture,
 					      RADEON_USAGE_READWRITE,
@@ -1571,7 +1571,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 						      RADEON_PRIO_COLOR_BUFFER);
 
 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			cmask_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+			cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_COLOR_META);
 		} else {
@@ -1616,7 +1616,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 				       cb->cb_color_info | tex->cb_color_info);
 
 		if (!rctx->keep_tiling_flags) {
-			unsigned reloc = r600_context_bo_reloc(&rctx->b,
+			unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
 							       &rctx->b.rings.gfx,
 							       (struct r600_resource*)state->cbufs[0]->texture,
 							       RADEON_USAGE_READWRITE,
@@ -1639,7 +1639,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	/* ZS buffer. */
 	if (state->zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
-		unsigned reloc = r600_context_bo_reloc(&rctx->b,
+		unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
 						       &rctx->b.rings.gfx,
 						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE,
@@ -1755,7 +1755,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
@@ -1869,7 +1869,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 	}
 	state->dirty_mask = 0;
@@ -1917,7 +1917,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		}
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
@@ -1942,7 +1942,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 			    S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 
 		dirty_mask &= ~(1 << buffer_index);
@@ -2001,7 +2001,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 		radeon_emit(cs, (resource_id_base + resource_index) * 8);
 		radeon_emit_array(cs, rview->tex_resource_words, 8);
 
-		reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ,
 					      rview->tex_resource->b.b.nr_samples > 1 ?
 						      RADEON_PRIO_SHADER_TEXTURE_MSAA :
@@ -2124,7 +2124,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
 	radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
 			       (shader->buffer->gpu_address + shader->offset) >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
 }
 
@@ -2182,7 +2182,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
 		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
 		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -2192,7 +2192,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
 		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
 		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -3307,9 +3307,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 		}
 		size = (cheight * pitch) / 4;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
 				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
 				      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
 		cs->buf[cs->cdw++] = base >> 8;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index d5eec15f1fb..2fe29e91c4f 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -417,9 +417,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		src_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
+		src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
 						  RADEON_USAGE_READ, RADEON_PRIO_MIN);
-		dst_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
+		dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
 						  RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
@@ -470,9 +470,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
 	for (i = 0; i < ncopy; i++) {
 		csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
 				      RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_MIN);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
 		cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1af96f64d40..aff8f03f9b1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1410,7 +1410,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			/* COLOR_BASE */
 			radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
 
-			reloc = r600_context_bo_reloc(&rctx->b,
+			reloc = radeon_add_to_buffer_list(&rctx->b,
 						      &rctx->b.rings.gfx,
 						      (struct r600_resource*)cb[i]->base.texture,
 						      RADEON_USAGE_READWRITE,
@@ -1423,7 +1423,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			/* FMASK */
 			radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
 
-			reloc = r600_context_bo_reloc(&rctx->b,
+			reloc = radeon_add_to_buffer_list(&rctx->b,
 						      &rctx->b.rings.gfx,
 						      cb[i]->cb_buffer_fmask,
 						      RADEON_USAGE_READWRITE,
@@ -1436,7 +1436,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			/* CMASK */
 			radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
 
-			reloc = r600_context_bo_reloc(&rctx->b,
+			reloc = radeon_add_to_buffer_list(&rctx->b,
 						      &rctx->b.rings.gfx,
 						      cb[i]->cb_buffer_cmask,
 						      RADEON_USAGE_READWRITE,
@@ -1475,7 +1475,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	/* Zbuffer. */
 	if (state->zsbuf) {
 		struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
-		unsigned reloc = r600_context_bo_reloc(&rctx->b,
+		unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
 						       &rctx->b.rings.gfx,
 						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE,
@@ -1589,7 +1589,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
 		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
@@ -1704,7 +1704,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 	}
 }
@@ -1737,7 +1737,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		}
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
@@ -1753,7 +1753,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
 
 		dirty_mask &= ~(1 << buffer_index);
@@ -1801,7 +1801,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 		radeon_emit(cs, (resource_id_base + resource_index) * 7);
 		radeon_emit_array(cs, rview->tex_resource_words, 7);
 
-		reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ,
 					      rview->tex_resource->b.b.nr_samples > 1 ?
 						      RADEON_PRIO_SHADER_TEXTURE_MSAA :
@@ -1932,7 +1932,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
 
 	radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
 }
 
@@ -1985,7 +1985,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 		rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
 		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
 		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -1994,7 +1994,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 		rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
 		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_SHADER_RESOURCE_RW));
 		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -2901,9 +2901,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 		cheight = cheight > copy_height ? copy_height : cheight;
 		size = (cheight * pitch) / 4;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
 				      RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_MIN);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
 		cs->buf[cs->cdw++] = base >> 8;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 9f6884d2109..24ed74b40d6 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1620,7 +1620,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-		cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+		cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 							   (struct r600_resource*)info.indirect,
 							   RADEON_USAGE_READ, RADEON_PRIO_MIN);
 	}
@@ -1649,7 +1649,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = info.count;
 				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-				cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ, RADEON_PRIO_MIN);
 			}
@@ -1661,7 +1661,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-				cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ, RADEON_PRIO_MIN);
 
@@ -1688,7 +1688,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->buf[cs->cdw++] = 0; /* unused */
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+			cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
 								   t->buf_filled_size, RADEON_USAGE_READ,
 								   RADEON_PRIO_MIN);
 		}
@@ -1879,7 +1879,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
 
 	r600_emit_command_buffer(cs, &shader->command_buffer);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
 					      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
 }
 
@@ -2607,7 +2607,7 @@ void r600_trace_emit(struct r600_context *rctx)
 	uint32_t reloc;
 
 	va = rscreen->b.trace_bo->gpu_address;
-	reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
+	reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
 				      RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
 	radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
 	radeon_emit(cs, va & 0xFFFFFFFFUL);
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index 188abccb507..fa40dc42a31 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -33,11 +33,21 @@
 #include "r600_pipe_common.h"
 #include "r600d_common.h"
 
-static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
-					     struct r600_ring *ring,
-					     struct r600_resource *rbo,
-					     enum radeon_bo_usage usage,
-					     enum radeon_bo_priority priority)
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+						 struct r600_ring *ring,
+						 struct r600_resource *rbo,
+						 enum radeon_bo_usage usage,
+						 enum radeon_bo_priority priority)
 {
 	assert(usage);
 
@@ -66,7 +76,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
 {
 	struct radeon_winsys_cs *cs = ring->cs;
 	bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
-	unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
+	unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
 
 	if (!has_vm) {
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 47b586f171e..8b0ce9f1bb8 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -61,9 +61,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
 	ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
 	r600_need_dma_space(&ctx->b, ncopy * 7);
 
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_MIN);
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
 			      RADEON_PRIO_MIN);
 
 	for (i = 0; i < ncopy; i++) {
@@ -171,9 +171,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
 	ncopy = (copy_height + cheight - 1) / cheight;
 	r600_need_dma_space(&ctx->b, ncopy * 12);
 
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
 			      RADEON_USAGE_READ, RADEON_PRIO_MIN);
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
 			      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
 	copy_height = size * 4 / pitch;
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 8dd12f63e63..7b8a8433cc6 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -159,7 +159,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 				 FALSE);
 
 		/* This must be done after need_cs_space. */
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_MIN);
 
@@ -240,9 +240,9 @@ void si_copy_buffer(struct si_context *sctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
 				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
 				      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
 		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 558814352aa..762a4b77e5d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -117,7 +117,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
 
 	util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
 
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
 			      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 
 	desc->list_dirty = false;
@@ -163,14 +163,14 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
 		if (!rview->resource)
 			continue;
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      rview->resource, RADEON_USAGE_READ,
 				      si_get_resource_ro_priority(rview->resource));
 	}
 
 	if (!views->desc.buffer)
 		return;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
 }
 
@@ -188,7 +188,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
 			(struct si_sampler_view*)view;
 
 		if (rview->resource)
-			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				rview->resource, RADEON_USAGE_READ,
 				si_get_resource_ro_priority(rview->resource));
 
@@ -269,7 +269,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
 {
 	if (!states->desc.buffer)
 		return;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
 }
 
@@ -335,14 +335,14 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 	while (mask) {
 		int i = u_bit_scan64(&mask);
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource*)buffers->buffers[i],
 				      buffers->shader_usage, buffers->priority);
 	}
 
 	if (!buffers->desc.buffer)
 		return;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 			      buffers->desc.buffer, RADEON_USAGE_READWRITE,
 			      RADEON_PRIO_SHADER_DATA);
 }
@@ -363,14 +363,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 		if (!sctx->vertex_buffer[vb].buffer)
 			continue;
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
 	}
 
 	if (!desc->buffer)
 		return;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_SHADER_DATA);
 }
@@ -397,7 +397,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	if (!desc->buffer)
 		return false;
 
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_SHADER_DATA);
 
@@ -441,7 +441,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 		desc[3] = sctx->vertex_elements->rsrc_word3[i];
 
 		if (!bound[ve->vertex_buffer_index]) {
-			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 					      (struct r600_resource*)vb->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
 			bound[ve->vertex_buffer_index] = true;
@@ -520,7 +520,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 		buffers->buffers[slot] = buffer;
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource*)buffer,
 				      buffers->shader_usage, buffers->priority);
 		buffers->desc.enabled_mask |= 1llu << slot;
@@ -615,7 +615,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			  S_008F0C_ADD_TID_ENABLE(add_tid);
 
 		pipe_resource_reference(&buffers->buffers[slot], buffer);
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource*)buffer,
 				      buffers->shader_usage, buffers->priority);
 		buffers->desc.enabled_mask |= 1llu << slot;
@@ -705,7 +705,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 			/* Set the resource. */
 			pipe_resource_reference(&buffers->buffers[bufidx],
 						buffer);
-			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 					      (struct r600_resource*)buffer,
 					      buffers->shader_usage, buffers->priority);
 			buffers->desc.enabled_mask |= 1llu << bufidx;
@@ -804,7 +804,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				buffers->desc.list_dirty = true;
 
-				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);
 
@@ -833,7 +833,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				buffers->desc.list_dirty = true;
 
-				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);
 			}
@@ -858,7 +858,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				views->desc.list_dirty = true;
 
-				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, RADEON_USAGE_READ,
 						      RADEON_PRIO_SHADER_BUFFER_RO);
 			}
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 1a7eeaecf9e..309ae04424a 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -78,9 +78,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
 
 	r600_need_dma_space(&ctx->b, ncopy * 5);
 
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_MIN);
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
 			      RADEON_PRIO_MIN);
 
 	for (i = 0; i < ncopy; i++) {
@@ -177,9 +177,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
 	ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
 	r600_need_dma_space(&ctx->b, ncopy * 9);
 
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
 			      RADEON_USAGE_READ, RADEON_PRIO_MIN);
-	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
 			      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
 	for (i = 0; i < ncopy; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 036d90cabb1..9c4d7ec8ba3 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -144,7 +144,7 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	for (int i = 0; i < state->nbo; ++i) {
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5c922b04c0a..dd0fe0e2edf 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2235,14 +2235,14 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		}
 
 		tex = (struct r600_texture *)cb->base.texture;
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      &tex->resource, RADEON_USAGE_READWRITE,
 				      tex->surface.nsamples > 1 ?
 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
 					      RADEON_PRIO_COLOR_BUFFER);
 
 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_COLOR_META);
 		}
@@ -2282,14 +2282,14 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      &rtex->resource, RADEON_USAGE_READWRITE,
 				      zb->base.texture->nr_samples > 1 ?
 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
 					      RADEON_PRIO_DEPTH_BUFFER);
 
 		if (zb->db_htile_data_base) {
-			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
 					      RADEON_PRIO_DEPTH_META);
 		}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ebcc2691773..36f15852843 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -351,7 +351,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
 			       sctx->spi_tmpring_size);
 
 	if (sctx->scratch_buffer) {
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      sctx->scratch_buffer, RADEON_USAGE_READWRITE,
 				      RADEON_PRIO_SHADER_RESOURCE_RW);
 
@@ -465,7 +465,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
 		radeon_emit(cs, 0); /* unused */
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      t->buf_filled_size, RADEON_USAGE_READ,
 				      RADEON_PRIO_MIN);
 	}
@@ -519,7 +519,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 	} else {
 		si_invalidate_draw_sh_constants(sctx);
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource *)info->indirect,
 				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
 	}
@@ -529,7 +529,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 					  ib->index_size;
 		uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
 
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
 				      (struct r600_resource *)ib->buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
 
@@ -876,7 +876,7 @@ void si_trace_emit(struct si_context *sctx)
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 
 	sctx->trace_id++;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
-- 
cgit v1.2.3


From fc95058add3d7a90220548e0bb5679d97264f3d2 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 03:17:30 +0200
Subject: radeonsi: convert SPI state to an atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c    |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 +
 src/gallium/drivers/radeonsi/si_state.h         |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 25 ++++++++++++++++---------
 4 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 28aed79a0bf..86d7140b327 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -197,6 +197,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+	si_mark_atom_dirty(ctx, &ctx->spi_map);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e5d5d10851f..ba5dd1dbf77 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -204,6 +204,7 @@ struct si_context {
 	struct r600_atom		clip_regs;
 	struct si_sample_mask		sample_mask;
 	struct r600_atom		cb_target_mask;
+	struct r600_atom		spi_map;
 	struct r600_atom		msaa_sample_locs;
 	struct r600_atom		msaa_config;
 	int				ps_iter_samples;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index a396f07e926..b15f971f369 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -99,7 +99,6 @@ union si_state {
 		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*ps;
-		struct si_pm4_state		*spi;
 	} named;
 	struct si_pm4_state	*array[0];
 };
@@ -123,6 +122,7 @@ union si_state_atoms {
 		struct r600_atom *scissors;
 		struct r600_atom *viewports;
 		struct r600_atom *stencil_ref;
+		struct r600_atom *spi_map;
 	} s;
 	struct r600_atom *array[0];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 702af8c803e..cef32822e54 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -28,6 +28,7 @@
 #include "si_pipe.h"
 #include "si_shader.h"
 #include "sid.h"
+#include "radeon/r600_cs.h"
 
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
@@ -980,14 +981,19 @@ static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
 	si_delete_shader_selector(ctx, sel);
 }
 
-static void si_update_spi_map(struct si_context *sctx)
+static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 {
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	struct si_shader *ps = sctx->ps_shader->current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo = &ps->selector->info;
 	struct tgsi_shader_info *vsinfo = &vs->selector->info;
-	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-	unsigned i, j, tmp;
+	unsigned i, j, tmp, num_written = 0;
+
+	if (!ps->nparam)
+		return;
+
+	radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
 
 	for (i = 0; i < psinfo->num_inputs; i++) {
 		unsigned name = psinfo->input_semantic_name[i];
@@ -1031,9 +1037,9 @@ bcolor:
 			tmp = S_028644_OFFSET(0x20);
 		}
 
-		si_pm4_set_reg(pm4,
-			       R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4,
-			       tmp);
+		assert(param_offset == num_written);
+		radeon_emit(cs, tmp);
+		num_written++;
 
 		if (name == TGSI_SEMANTIC_COLOR &&
 		    ps->key.ps.color_two_side) {
@@ -1042,8 +1048,7 @@ bcolor:
 			goto bcolor;
 		}
 	}
-
-	si_pm4_set_state(sctx, spi, pm4);
+	assert(ps->nparam == num_written);
 }
 
 /* Initialize state related to ESGS / GSVS ring buffers */
@@ -1452,7 +1457,7 @@ void si_update_shaders(struct si_context *sctx)
 	    sctx->flatshade != rs->flatshade) {
 		sctx->sprite_coord_enable = rs->sprite_coord_enable;
 		sctx->flatshade = rs->flatshade;
-		si_update_spi_map(sctx);
+		si_mark_atom_dirty(sctx, &sctx->spi_map);
 	}
 
 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
@@ -1476,6 +1481,8 @@ void si_update_shaders(struct si_context *sctx)
 
 void si_init_shader_functions(struct si_context *sctx)
 {
+	si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map, 2+31);
+
 	sctx->b.b.create_vs_state = si_create_vs_state;
 	sctx->b.b.create_tcs_state = si_create_tcs_state;
 	sctx->b.b.create_tes_state = si_create_tes_state;
-- 
cgit v1.2.3


From ec9d5e181e3ae30e00abed64762945beda6d3c0e Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 03:49:15 +0200
Subject: radeonsi: don't count IB space for states, just use an upper bound
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we don't put any resource descriptors in IBs, the space used by draw
calls is quite small.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c | 53 +++-------------------------
 src/gallium/drivers/radeonsi/si_pipe.h       |  7 ----
 2 files changed, 5 insertions(+), 55 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 86d7140b327..dffc6df6c81 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -35,8 +35,9 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_reloc) and two counters in the pipe
 	 * driver for those that haven't been added yet.
-	 * */
-	if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+	 */
+	if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
+						       ctx->b.vram, ctx->b.gtt))) {
 		ctx->b.gtt = 0;
 		ctx->b.vram = 0;
 		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
@@ -46,54 +47,10 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	ctx->b.vram = 0;
 
 	/* If the CS is sufficiently large, don't count the space needed
-	 * and just flush if there is less than 8096 dwords left.
+	 * and just flush if there is not enough space left.
 	 */
-	if (cs->max_dw >= 24 * 1024) {
-		if (cs->cdw > cs->max_dw - 8 * 1024)
-			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-		return;
-	}
-
-	/* The number of dwords we already used in the CS so far. */
-	num_dw += cs->cdw;
-
-	if (count_draw_in) {
-		unsigned mask = ctx->dirty_atoms;
-
-		while (mask)
-			num_dw += ctx->atoms.array[u_bit_scan(&mask)]->num_dw;
-
-		/* The number of dwords all the dirty states would take. */
-		num_dw += si_pm4_dirty_dw(ctx);
-
-		/* The upper-bound of how much a draw command would take. */
-		num_dw += SI_MAX_DRAW_CS_DWORDS;
-	}
-
-	/* Count in queries_suspend. */
-	num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
-		  ctx->b.num_cs_dw_timer_queries_suspend;
-
-	/* Count in streamout_end at the end of CS. */
-	if (ctx->b.streamout.begin_emitted) {
-		num_dw += ctx->b.streamout.num_dw_for_end;
-	}
-
-	/* Count in render_condition(NULL) at the end of CS. */
-	if (ctx->b.predicate_drawing) {
-		num_dw += 3;
-	}
-
-	/* Count in framebuffer cache flushes at the end of CS. */
-	num_dw += ctx->atoms.s.cache_flush->num_dw;
-
-	if (ctx->screen->b.trace_bo)
-		num_dw += SI_TRACE_CS_DWORDS * 2;
-
-	/* Flush if there's not enough space. */
-	if (num_dw > cs->max_dw) {
+	if (unlikely(cs->cdw > cs->max_dw - 2048))
 		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-	}
 }
 
 void si_context_gfx_flush(void *context, unsigned flags,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ba5dd1dbf77..abdc900ece6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -43,13 +43,6 @@
 #define SI_RESTART_INDEX_UNKNOWN INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES 8
 
-#define SI_TRACE_CS_DWORDS		7
-
-#define SI_MAX_DRAW_CS_DWORDS \
-	(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
-	 /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
-	 /*derived tess state:*/ 19)
-
 /* Instruction cache. */
 #define SI_CONTEXT_INV_ICACHE		(R600_CONTEXT_PRIVATE_FLAG << 0)
 /* Cache used by scalar memory (SMEM) instructions. They also use TC
-- 
cgit v1.2.3


From aad43f0768edc0711d5f54ea79b052fb4f1d3321 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 03:53:39 +0200
Subject: radeonsi: don't set number of IB dwords for states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_descriptors.c   |  7 ++---
 src/gallium/drivers/radeonsi/si_state.c         | 35 ++++++++++---------------
 src/gallium/drivers/radeonsi/si_state.h         |  3 +--
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 +-
 4 files changed, 18 insertions(+), 29 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 762a4b77e5d..3041da621c3 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1027,12 +1027,9 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
 
-	/* Shader user data.
-	 * The number of dwords is set to the upper bound:
-	 *   4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader.
-	 */
+	/* Shader user data. */
 	si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
-		     si_emit_shader_userdata, (SI_NUM_SHADERS * 4 + 1 + 2) * 4);
+		     si_emit_shader_userdata);
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index dd0fe0e2edf..183ae4c5bfd 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -47,11 +47,9 @@ si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
 /* Initialize an atom owned by radeonsi.  */
 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
 		  struct r600_atom **list_elem,
-		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
-		  unsigned num_dw)
+		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
 {
 	atom->emit = (void*)emit_func;
-	atom->num_dw = num_dw;
 	atom->dirty = false;
 	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
 	*list_elem = atom;
@@ -2164,11 +2162,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 	si_update_poly_offset_state(sctx);
 	si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
-
-	sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
-	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
-	sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
-	sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
 	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 
 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
@@ -3056,19 +3049,19 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
 	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
 
-	si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush, 24);
-	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
-	si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs, 18);
-	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
-	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config, 10);
-	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask, 4);
-	si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask, 3);
-	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color, 6);
-	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
-	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state, 2+6*4);
-	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
-	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
-	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref, 4);
+	si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
+	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
+	si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
+	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
+	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
+	si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
+	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
+	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
+	si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors);
+	si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports);
+	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
 
 	sctx->b.b.create_blend_state = si_create_blend_state;
 	sctx->b.b.bind_blend_state = si_bind_blend_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index b15f971f369..9a1d08752c2 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -267,8 +267,7 @@ struct si_shader_selector;
 
 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
 		  struct r600_atom **list_elem,
-		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
-		  unsigned num_dw);
+		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state));
 boolean si_is_format_supported(struct pipe_screen *screen,
                                enum pipe_format format,
                                enum pipe_texture_target target,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index cef32822e54..78b68bc3dfe 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1481,7 +1481,7 @@ void si_update_shaders(struct si_context *sctx)
 
 void si_init_shader_functions(struct si_context *sctx)
 {
-	si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map, 2+31);
+	si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
 
 	sctx->b.b.create_vs_state = si_create_vs_state;
 	sctx->b.b.create_tcs_state = si_create_tcs_state;
-- 
cgit v1.2.3


From 28b34b474e330be881d15a34859811e9f5e36eb5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 03:56:13 +0200
Subject: radeonsi: don't send IB dword usage to si_need_cs_space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c     | 5 ++---
 src/gallium/drivers/radeonsi/si_hw_context.c | 3 +--
 src/gallium/drivers/radeonsi/si_pipe.h       | 2 +-
 src/gallium/drivers/radeonsi/si_state.c      | 2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c | 2 +-
 5 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 7b8a8433cc6..32ab6a9dcbf 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -155,8 +155,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned dma_flags = tc_l2_flag;
 
-		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
-				 FALSE);
+		si_need_cs_space(sctx);
 
 		/* This must be done after need_cs_space. */
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
@@ -226,7 +225,7 @@ void si_copy_buffer(struct si_context *sctx,
 		unsigned sync_flags = tc_l2_flag;
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 
-		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+		si_need_cs_space(sctx);
 
 		/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
 		if (sctx->b.flags) {
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index dffc6df6c81..1d5d42657e4 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -27,8 +27,7 @@
 #include "si_pipe.h"
 
 /* initialize */
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
-			boolean count_draw_in)
+void si_need_cs_space(struct si_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index abdc900ece6..1c4c46d916a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -317,7 +317,7 @@ void si_dma_copy(struct pipe_context *ctx,
 void si_context_gfx_flush(void *context, unsigned flags,
 			  struct pipe_fence_handle **fence);
 void si_begin_new_cs(struct si_context *ctx);
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in);
+void si_need_cs_space(struct si_context *ctx);
 
 /* si_compute.c */
 void si_init_compute_functions(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 183ae4c5bfd..9019b3596e6 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3039,7 +3039,7 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
 				 bool include_draw_vbo)
 {
-	si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
+	si_need_cs_space((struct si_context*)ctx);
 }
 
 static void si_init_config(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 36f15852843..8cb98d7317c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -818,7 +818,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	if (sctx->b.flags)
 		si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
 
-	si_need_cs_space(sctx, 0, TRUE);
+	si_need_cs_space(sctx);
 
 	/* Emit states. */
 	mask = sctx->dirty_atoms;
-- 
cgit v1.2.3


From 228e80123ad7e9006c56f688ea065a0ec525bf33 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 12:25:03 +0200
Subject: radeonsi: reorder si_context variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.h | 85 ++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 40 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 1c4c46d916a..02d75f00f98 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -160,16 +160,41 @@ struct si_context {
 	void				*custom_blend_fastclear;
 	void				*pstipple_sampler_state;
 	struct si_screen		*screen;
-	struct si_pm4_state		*init_config;
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct si_shader_selector	*fixed_func_tcs_shader;
+	LLVMTargetMachineRef		tm;
 
+	/* Atoms (direct states). */
 	union si_state_atoms		atoms;
 	unsigned			dirty_atoms; /* mask */
+	/* PM4 states (precomputed immutable states) */
+	union si_state			queued;
+	union si_state			emitted;
 
+	/* Atom declarations. */
+	struct r600_atom		cache_flush;
 	struct si_framebuffer		framebuffer;
-	struct si_vertex_element	*vertex_elements;
-	/* for saving when using blitter */
+	struct r600_atom		msaa_sample_locs;
+	struct r600_atom		db_render_state;
+	struct r600_atom		msaa_config;
+	struct si_sample_mask		sample_mask;
+	struct r600_atom		cb_target_mask;
+	struct si_blend_color		blend_color;
+	struct r600_atom		clip_regs;
+	struct si_clip_state		clip_state;
+	struct si_shader_data		shader_userdata;
+	struct si_scissors		scissors;
+	struct si_viewports		viewports;
+	struct si_stencil_ref		stencil_ref;
+	struct r600_atom		spi_map;
+
+	/* Precomputed states. */
+	struct si_pm4_state		*init_config;
+	struct si_pm4_state		*vgt_shader_config[4];
+	/* With rasterizer discard, there doesn't have to be a pixel shader.
+	 * In that case, we bind this one: */
+	void				*dummy_pixel_shader;
+
 	/* shaders */
 	struct si_shader_selector	*ps_shader;
 	struct si_shader_selector	*gs_shader;
@@ -177,57 +202,36 @@ struct si_context {
 	struct si_shader_selector	*tcs_shader;
 	struct si_shader_selector	*tes_shader;
 	struct si_cs_shader_state	cs_shader_state;
-	struct si_shader_data		shader_userdata;
+
 	/* shader information */
+	struct si_vertex_element	*vertex_elements;
 	unsigned			sprite_coord_enable;
 	bool				flatshade;
+
+	/* shader descriptors */
 	struct si_descriptors		vertex_buffers;
 	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
 	struct si_buffer_resources	rw_buffers[SI_NUM_SHADERS];
 	struct si_textures_info		samplers[SI_NUM_SHADERS];
-	struct r600_resource		*scratch_buffer;
+
+	/* other shader resources */
+	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+	struct pipe_resource		*esgs_ring;
+	struct pipe_resource		*gsvs_ring;
+	struct pipe_resource		*tf_ring;
 	struct r600_resource		*border_color_table;
 	unsigned			border_color_offset;
 
-	struct si_blend_color		blend_color;
-	struct si_stencil_ref		stencil_ref;
-	struct si_scissors		scissors;
-	struct si_viewports		viewports;
-	struct si_clip_state		clip_state;
-	struct r600_atom		clip_regs;
-	struct si_sample_mask		sample_mask;
-	struct r600_atom		cb_target_mask;
-	struct r600_atom		spi_map;
-	struct r600_atom		msaa_sample_locs;
-	struct r600_atom		msaa_config;
-	int				ps_iter_samples;
-	bool				smoothing_enabled;
-
 	/* Vertex and index buffers. */
-	bool			vertex_buffers_dirty;
-	struct pipe_index_buffer index_buffer;
-	struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
-
-	/* With rasterizer discard, there doesn't have to be a pixel shader.
-	 * In that case, we bind this one: */
-	void			*dummy_pixel_shader;
-	struct r600_atom	cache_flush;
-	struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+	bool				vertex_buffers_dirty;
+	struct pipe_index_buffer	index_buffer;
+	struct pipe_vertex_buffer	vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
-	/* VGT states. */
-	struct si_pm4_state	*vgt_shader_config[4];
-	struct pipe_resource	*esgs_ring;
-	struct pipe_resource	*gsvs_ring;
-	struct pipe_resource	*tf_ring;
-
-	LLVMTargetMachineRef		tm;
-
-	/* SI state handling */
-	union si_state	queued;
-	union si_state	emitted;
+	/* MSAA config state. */
+	int				ps_iter_samples;
+	bool				smoothing_enabled;
 
 	/* DB render state. */
-	struct r600_atom	db_render_state;
 	bool			dbcb_depth_copy_enabled;
 	bool			dbcb_stencil_copy_enabled;
 	unsigned		dbcb_copy_sample;
@@ -252,6 +256,7 @@ struct si_context {
 	unsigned		last_gsvs_itemsize;
 
 	/* Scratch buffer */
+	struct r600_resource	*scratch_buffer;
 	boolean                 emit_scratch_reloc;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
-- 
cgit v1.2.3


From 77f80a20be9bbd8a51885e2e946106b45abf198a Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 12:35:02 +0200
Subject: radeonsi: remove unused si_pm4_state code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pm4.c | 25 ++-----------------------
 src/gallium/drivers/radeonsi/si_pm4.h |  5 -----
 2 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 9c4d7ec8ba3..1ae252a8629 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -124,37 +124,16 @@ void si_pm4_free_state(struct si_context *sctx,
 	si_pm4_free_state_simple(state);
 }
 
-unsigned si_pm4_dirty_dw(struct si_context *sctx)
-{
-	unsigned count = 0;
-
-	for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-		struct si_pm4_state *state = sctx->queued.array[i];
-
-		if (!state || sctx->emitted.array[i] == state)
-			continue;
-
-		count += state->ndw;
-	}
-
-	return count;
-}
-
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+
 	for (int i = 0; i < state->nbo; ++i) {
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
-	memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);
-
-	for (int i = 0; i < state->nrelocs; ++i) {
-		cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
-	}
-
-	cs->cdw += state->ndw;
+	radeon_emit_array(cs, state->pm4, state->ndw);
 }
 
 void si_pm4_emit_dirty(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index d2158827f58..7a8f5a02550 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -54,10 +54,6 @@ struct si_pm4_state
 	enum radeon_bo_usage	bo_usage[SI_PM4_MAX_BO];
 	enum radeon_bo_priority	bo_priority[SI_PM4_MAX_BO];
 
-	/* relocs for shader data */
-	unsigned	nrelocs;
-	unsigned	relocs[SI_PM4_MAX_RELOCS];
-
 	bool compute_pkt;
 };
 
@@ -76,7 +72,6 @@ void si_pm4_free_state(struct si_context *sctx,
 		       struct si_pm4_state *state,
 		       unsigned idx);
 
-unsigned si_pm4_dirty_dw(struct si_context *sctx);
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
 void si_pm4_emit_dirty(struct si_context *sctx);
 void si_pm4_reset_emitted(struct si_context *sctx);
-- 
cgit v1.2.3


From fbbebeae10f85e6fe9b81cf4187b8eb8ecba6da5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 12:39:45 +0200
Subject: radeonsi: inline si_cmd_context_control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/Makefile.sources |  1 -
 src/gallium/drivers/radeonsi/si_commands.c    | 36 ---------------------------
 src/gallium/drivers/radeonsi/si_state.c       |  5 +++-
 src/gallium/drivers/radeonsi/si_state.h       |  3 ---
 4 files changed, 4 insertions(+), 41 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si_commands.c

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index fd44807408e..5f5eac12be0 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -1,7 +1,6 @@
 C_SOURCES := \
 	cik_sdma.c \
 	si_blit.c \
-	si_commands.c \
 	si_compute.c \
 	si_cp_dma.c \
 	si_debug.c \
diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c
deleted file mode 100644
index 04bc5b9b7fc..00000000000
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Christian König <christian.koenig@amd.com>
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-void si_cmd_context_control(struct si_pm4_state *pm4)
-{
-	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_end(pm4, false);
-}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 9019b3596e6..7f4e846cd16 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3261,7 +3261,10 @@ static void si_init_config(struct si_context *sctx)
 	if (pm4 == NULL)
 		return;
 
-	si_cmd_context_control(pm4);
+	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
+	si_pm4_cmd_add(pm4, 0x80000000);
+	si_pm4_cmd_add(pm4, 0x80000000);
+	si_pm4_cmd_end(pm4, false);
 
 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
 	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 9a1d08752c2..49f9f65bc14 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -297,7 +297,4 @@ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 void si_trace_emit(struct si_context *sctx);
 
-/* si_commands.c */
-void si_cmd_context_control(struct si_pm4_state *pm4);
-
 #endif
-- 
cgit v1.2.3


From 5e2619ef3078fe4f9c3e0780ee520fbfb727ee54 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 13:17:15 +0200
Subject: radeonsi: use all built-in border colors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 7f4e846cd16..52fa8fec033 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2709,10 +2709,25 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	if (sampler_state_needs_border_color(state))
-		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
-	else
+	if (!sampler_state_needs_border_color(state))
+		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+	else if (state->border_color.f[0] == 0 &&
+		 state->border_color.f[1] == 0 &&
+		 state->border_color.f[2] == 0 &&
+		 state->border_color.f[3] == 0)
 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+	else if (state->border_color.f[0] == 0 &&
+		 state->border_color.f[1] == 0 &&
+		 state->border_color.f[2] == 0 &&
+		 state->border_color.f[3] == 1)
+		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
+	else if (state->border_color.f[0] == 1 &&
+		 state->border_color.f[1] == 1 &&
+		 state->border_color.f[2] == 1 &&
+		 state->border_color.f[3] == 1)
+		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
+	else
+		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
 
 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
-- 
cgit v1.2.3


From a9971e85d9a4038645bdc7496d73906fc324b805 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 14:13:10 +0200
Subject: radeonsi: rework uploading border colors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The border colors are uploaded only once when the state is created.

This brings truly immutable sampler descriptors, because they don't have
to be updated every time a sampler state is re-bound.

It also moves the TA_BC_BASE_ADDR registers to init_config, removing one
more state. The catch is there is now a limit: only 4096 border colors can
be used by one context. I don't think that will be a problem.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_descriptors.c |   9 +-
 src/gallium/drivers/radeonsi/si_pipe.c        |  23 ++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |   8 +-
 src/gallium/drivers/radeonsi/si_state.c       | 124 +++++++++-----------------
 src/gallium/drivers/radeonsi/si_state.h       |   3 -
 5 files changed, 75 insertions(+), 92 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 3041da621c3..92a7068e715 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
 }
 
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-				unsigned start, unsigned count, void **states)
+static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
+                                   unsigned start, unsigned count, void **states)
 {
+	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_sampler_states *samplers = &sctx->samplers[shader].states;
 	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
 	int i;
 
+	if (!count || shader >= SI_NUM_SHADERS)
+		return;
+
 	if (start == 0)
 		samplers->saved_states[0] = states[0];
 	if (start == 1)
@@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 			    4, SI_NUM_VERTEX_BUFFERS);
 
 	/* Set pipe_context functions. */
+	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_constant_buffer = si_set_constant_buffer;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index da774789525..d68ea5fb31d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context)
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
 	pipe_resource_reference(&sctx->tf_ring, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
-	r600_resource_reference(&sctx->border_color_table, NULL);
+	r600_resource_reference(&sctx->border_color_buffer, NULL);
+	free(sctx->border_color_table);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
 	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
@@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 						sscreen->b.trace_bo->cs_buf : NULL);
 	sctx->b.rings.gfx.flush = si_context_gfx_flush;
 
+	/* Border colors. */
+	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
+					  sizeof(*sctx->border_color_table));
+	if (!sctx->border_color_table)
+		goto fail;
+
+	sctx->border_color_buffer = (struct r600_resource*)
+		pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
+				   SI_MAX_BORDER_COLORS *
+				   sizeof(*sctx->border_color_table));
+	if (!sctx->border_color_buffer)
+		goto fail;
+
+	sctx->border_color_map =
+		ws->buffer_map(sctx->border_color_buffer->cs_buf,
+			       NULL, PIPE_TRANSFER_WRITE);
+	if (!sctx->border_color_map)
+		goto fail;
+
 	si_init_all_descriptors(sctx);
 	si_init_state_functions(sctx);
 	si_init_shader_functions(sctx);
@@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	return &sctx->b.b;
 fail:
+	fprintf(stderr, "radeonsi: Failed to create a context.\n");
 	si_destroy_context(&sctx->b.b);
 	return NULL;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 02d75f00f98..847853e59e9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -79,6 +79,7 @@
 #define SI_GET_TRACE_POINT_ID(x)	((x) & 0xffff)
 
 #define SI_MAX_VIEWPORTS	16
+#define SI_MAX_BORDER_COLORS	4096
 
 struct si_compute;
 
@@ -103,7 +104,6 @@ struct si_sampler_view {
 
 struct si_sampler_state {
 	uint32_t			val[4];
-	uint32_t			border_color[4];
 };
 
 struct si_cs_shader_state {
@@ -219,8 +219,10 @@ struct si_context {
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tf_ring;
-	struct r600_resource		*border_color_table;
-	unsigned			border_color_offset;
+	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
+	struct r600_resource		*border_color_buffer;
+	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
+	unsigned			border_color_count;
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 52fa8fec033..e31895d6933 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st
 static void *si_create_sampler_state(struct pipe_context *ctx,
 				     const struct pipe_sampler_state *state)
 {
+	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
 	unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
-	unsigned border_color_type;
+	unsigned border_color_type, border_color_index = 0;
 
 	if (rstate == NULL) {
 		return NULL;
@@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 		 state->border_color.f[2] == 1 &&
 		 state->border_color.f[3] == 1)
 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
-	else
+	else {
+		int i;
+
 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
 
+		/* Check if the border has been uploaded already. */
+		for (i = 0; i < sctx->border_color_count; i++)
+			if (memcmp(&sctx->border_color_table[i], &state->border_color,
+				   sizeof(state->border_color)) == 0)
+				break;
+
+		if (i >= SI_MAX_BORDER_COLORS) {
+			/* Getting 4096 unique border colors is very unlikely. */
+			fprintf(stderr, "radeonsi: The border color table is full. "
+				"Any new border colors will be just black. "
+				"Please file a bug.\n");
+			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+		} else {
+			if (i == sctx->border_color_count) {
+				/* Upload a new border color. */
+				memcpy(&sctx->border_color_table[i], &state->border_color,
+				       sizeof(state->border_color));
+				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
+							&state->border_color,
+							sizeof(state->border_color));
+				sctx->border_color_count++;
+			}
+
+			border_color_index = i;
+		}
+	}
+
 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
@@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
 			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
-	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
-
-	if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-		memcpy(rstate->border_color, state->border_color.ui,
-		       sizeof(rstate->border_color));
-	}
-
+	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
+			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
 	return rstate;
 }
 
-/* Upload border colors and update the pointers in resource descriptors.
- * There can only be 4096 border colors per context.
- *
- * XXX: This is broken if the buffer gets reallocated.
- */
-static void si_set_border_colors(struct si_context *sctx, unsigned count,
-				 void **states)
-{
-	struct si_sampler_state **rstates = (struct si_sampler_state **)states;
-	uint32_t *border_color_table = NULL;
-	int i, j;
-
-	for (i = 0; i < count; i++) {
-		if (rstates[i] &&
-		    G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
-		    V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-			if (!sctx->border_color_table ||
-			    ((sctx->border_color_offset + count - i) &
-			     C_008F3C_BORDER_COLOR_PTR)) {
-				r600_resource_reference(&sctx->border_color_table, NULL);
-				sctx->border_color_offset = 0;
-
-				sctx->border_color_table =
-					si_resource_create_custom(&sctx->screen->b.b,
-								  PIPE_USAGE_DYNAMIC,
-								  4096 * 4 * 4);
-			}
-
-			if (!border_color_table) {
-			        border_color_table =
-					sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
-							     sctx->b.rings.gfx.cs,
-							     PIPE_TRANSFER_WRITE |
-							     PIPE_TRANSFER_UNSYNCHRONIZED);
-			}
-
-			for (j = 0; j < 4; j++) {
-				border_color_table[4 * sctx->border_color_offset + j] =
-					util_le32_to_cpu(rstates[i]->border_color[j]);
-			}
-
-			rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
-			rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
-		}
-	}
-
-	if (border_color_table) {
-		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
-		uint64_t va_offset = sctx->border_color_table->gpu_address;
-
-		si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
-		if (sctx->b.chip_class >= CIK)
-			si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
-		si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
-			      RADEON_PRIO_SHADER_DATA);
-		si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
-	}
-}
-
-static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
-                                   unsigned start, unsigned count,
-                                   void **states)
-{
-	struct si_context *sctx = (struct si_context *)ctx;
-
-	if (!count || shader >= SI_NUM_SHADERS)
-		return;
-
-	si_set_border_colors(sctx, count, states);
-	si_set_sampler_descriptors(sctx, shader, start, count, states);
-}
-
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx)
 	sctx->b.b.get_sample_position = cayman_get_sample_position;
 
 	sctx->b.b.create_sampler_state = si_create_sampler_state;
-	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
 
 	sctx->b.b.create_sampler_view = si_create_sampler_view;
@@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx)
 	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
+	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	int i;
 
@@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
 	}
 
+	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+	if (sctx->b.chip_class >= CIK)
+		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
+	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
+		      RADEON_PRIO_SHADER_DATA);
+
 	sctx->init_config = pm4;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 49f9f65bc14..f5726f0c5de 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -91,7 +91,6 @@ union si_state {
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*poly_offset;
-		struct si_pm4_state		*ta_bordercolor_base;
 		struct si_pm4_state		*ls;
 		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
@@ -246,8 +245,6 @@ struct si_buffer_resources {
 	} while(0)
 
 /* si_descriptors.c */
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-				unsigned start, unsigned count, void **states);
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
-- 
cgit v1.2.3


From b89fa63d45cc5c8b3eec0d38ba30c57a5ecf5565 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 14:39:54 +0200
Subject: radeonsi: remove si_pm4_cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All remaining pm4 state are created and destroyed by state trackers.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c | 2 --
 src/gallium/drivers/radeonsi/si_pm4.c  | 7 -------
 src/gallium/drivers/radeonsi/si_pm4.h  | 1 -
 3 files changed, 10 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index d68ea5fb31d..7dbb2e30422 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -72,8 +72,6 @@ static void si_destroy_context(struct pipe_context *context)
 	if (sctx->blitter)
 		util_blitter_destroy(sctx->blitter);
 
-	si_pm4_cleanup(sctx);
-
 	r600_common_context_cleanup(&sctx->b);
 
 #if HAVE_LLVM >= 0x0306
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 1ae252a8629..b06e92b1fef 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -153,10 +153,3 @@ void si_pm4_reset_emitted(struct si_context *sctx)
 {
 	memset(&sctx->emitted, 0, sizeof(sctx->emitted));
 }
-
-void si_pm4_cleanup(struct si_context *sctx)
-{
-	for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-		si_pm4_free_state(sctx, sctx->queued.array[i], i);
-	}
-}
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 7a8f5a02550..efa20628aab 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -75,6 +75,5 @@ void si_pm4_free_state(struct si_context *sctx,
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
 void si_pm4_emit_dirty(struct si_context *sctx);
 void si_pm4_reset_emitted(struct si_context *sctx);
-void si_pm4_cleanup(struct si_context *sctx);
 
 #endif
-- 
cgit v1.2.3


From 0f4688fbe7237c4bc5b9849e90452a9731cd3748 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 14:43:59 +0200
Subject: radeonsi: remove unused macro si_pm4_set_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f5726f0c5de..3718e05f3b3 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -234,16 +234,6 @@ struct si_buffer_resources {
 				  si_pm4_block_idx(member)); \
 	} while(0)
 
-#define si_pm4_set_state(sctx, member, value) \
-	do { \
-		if ((sctx)->queued.named.member != (value)) { \
-			si_pm4_free_state(sctx, \
-				(struct si_pm4_state *)(sctx)->queued.named.member, \
-				si_pm4_block_idx(member)); \
-			(sctx)->queued.named.member = (value); \
-		} \
-	} while(0)
-
 /* si_descriptors.c */
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			struct pipe_resource *buffer,
-- 
cgit v1.2.3


From ba79ff7fa8c202ea9c55c0764118be481beb8a83 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 17:39:03 +0200
Subject: winsys/amdgpu: remove IB padding for SI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SI is unsupported by amdgpu

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 3e9fc4023b3..84fc40b923d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -601,25 +601,13 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
    switch (cs->base.ring_type) {
    case RING_DMA:
       /* pad DMA ring to 8 DWs */
-      if (ws->info.chip_class <= SI) {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
-      } else {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0x00000000); /* NOP packet */
-      }
+      while (rcs->cdw & 7)
+         OUT_CS(&cs->base, 0x00000000); /* NOP packet */
       break;
    case RING_GFX:
-      /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
-             * r6xx, requires at least 4 dw alignment to avoid a hw bug.
-             */
-      if (ws->info.chip_class <= SI) {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
-      } else {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
-      }
+      /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
+      while (rcs->cdw & 7)
+         OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
       break;
    case RING_UVD:
       while (rcs->cdw & 15)
-- 
cgit v1.2.3


From 8a9ab86ca6d510763bfe8532071c5fcfd977e3c4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 17:41:23 +0200
Subject: winsys/radeon: add a flag telling how gfx IBs should be padded
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is always false on amdgpu (set by calloc).

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeon/radeon_winsys.h        | 1 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c     | 8 ++------
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 7 +++++++
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index a4a2ae15acd..00accd5b3e6 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -223,6 +223,7 @@ struct radeon_info {
     boolean                     has_uvd;
     uint32_t                    vce_fw_version;
     boolean                     has_userptr;
+    bool                        gfx_ib_pad_with_type2;
 
     uint32_t                    r300_num_gb_pipes;
     uint32_t                    r300_num_z_pipes;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 341af55df8b..2c4f990944c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -466,14 +466,10 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
         }
         break;
     case RING_GFX:
-        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
          * r6xx, requires at least 4 dw alignment to avoid a hw bug.
-         * hawaii with old firmware needs type2 nop packet.
-         * accel_working2 with value 3 indicates the new firmware.
          */
-        if (cs->ws->info.chip_class <= SI ||
-            (cs->ws->info.family == CHIP_HAWAII &&
-             cs->ws->accel_working2 < 3)) {
+        if (cs->ws->info.gfx_ib_pad_with_type2) {
             while (rcs->cdw & 7)
                 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
         } else {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 384d7280380..5d440eb7696 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -469,6 +469,13 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
         ws->info.cik_macrotile_mode_array_valid = TRUE;
     }
 
+    /* Hawaii with old firmware needs type2 nop packet.
+     * accel_working2 with value 3 indicates the new firmware.
+     */
+    ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
+				     (ws->info.family == CHIP_HAWAII &&
+				      ws->accel_working2 < 3);
+
     return TRUE;
 }
 
-- 
cgit v1.2.3


From df12ddb55dc5c3d1887e7742bb9e2d4d4011f3fd Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 18:39:19 +0200
Subject: radeonsi: add IB2 indirect buffer support for pm4 states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_pm4.c | 48 ++++++++++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_pm4.h |  5 ++++
 src/gallium/drivers/radeonsi/sid.h    |  3 ++-
 3 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index b06e92b1fef..b1834afa796 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -107,6 +107,7 @@ void si_pm4_free_state_simple(struct si_pm4_state *state)
 {
 	for (int i = 0; i < state->nbo; ++i)
 		r600_resource_reference(&state->bo[i], NULL);
+	r600_resource_reference(&state->indirect_buffer, NULL);
 	FREE(state);
 }
 
@@ -133,7 +134,19 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
-	radeon_emit_array(cs, state->pm4, state->ndw);
+	if (!state->indirect_buffer) {
+		radeon_emit_array(cs, state->pm4, state->ndw);
+	} else {
+		struct r600_resource *ib = state->indirect_buffer;
+
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+					  RADEON_USAGE_READ, RADEON_PRIO_MIN);
+
+		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+		radeon_emit(cs, ib->gpu_address);
+		radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
+		radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
+	}
 }
 
 void si_pm4_emit_dirty(struct si_context *sctx)
@@ -153,3 +166,36 @@ void si_pm4_reset_emitted(struct si_context *sctx)
 {
 	memset(&sctx->emitted, 0, sizeof(sctx->emitted));
 }
+
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+				   struct si_pm4_state *state)
+{
+	struct pipe_screen *screen = sctx->b.b.screen;
+	unsigned aligned_ndw = align(state->ndw, 8);
+
+	/* only supported on CIK and later */
+	if (sctx->b.chip_class < CIK)
+		return;
+
+	assert(state->ndw);
+	assert(aligned_ndw <= SI_PM4_MAX_DW);
+
+	r600_resource_reference(&state->indirect_buffer, NULL);
+	state->indirect_buffer = (struct r600_resource*)
+		pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+				   PIPE_USAGE_DEFAULT, aligned_ndw * 4);
+	if (!state->indirect_buffer)
+		return;
+
+	/* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
+	if (sctx->screen->b.info.gfx_ib_pad_with_type2) {
+		for (int i = state->ndw; i < aligned_ndw; i++)
+			state->pm4[i] = 0x80000000; /* type2 nop packet */
+	} else {
+		for (int i = state->ndw; i < aligned_ndw; i++)
+			state->pm4[i] = 0xffff1000; /* type3 nop packet */
+	}
+
+	pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+			  0, aligned_ndw *4, state->pm4);
+}
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index efa20628aab..5282d00fe46 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -39,6 +39,9 @@ enum chip_class;
 
 struct si_pm4_state
 {
+	/* optional indirect buffer */
+	struct r600_resource	*indirect_buffer;
+
 	/* PKT3_SET_*_REG handling */
 	unsigned	last_opcode;
 	unsigned	last_reg;
@@ -66,6 +69,8 @@ void si_pm4_add_bo(struct si_pm4_state *state,
 		   struct r600_resource *bo,
 		   enum radeon_bo_usage usage,
 		   enum radeon_bo_priority priority);
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+				   struct si_pm4_state *state);
 
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index cd6be73f66c..4bb24572b90 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -94,7 +94,7 @@
 #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES                     0x2F
 #define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
-#define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
 #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
 #define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_DRAW_PREAMBLE                     0x36 /* new on CIK, required on GFX7.2 and later */
@@ -122,6 +122,7 @@
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
 #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
 #define PKT3_COPY_DATA			       0x40
 #define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
 #define			COPY_DATA_REG		0
-- 
cgit v1.2.3


From 2d8f7d3c153bf73fa7137b89e194d4e0e79d943d Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 30 Aug 2015 18:46:06 +0200
Subject: radeonsi: use an indirect buffer for init_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c         | 1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e31895d6933..1e2f32a8799 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3392,5 +3392,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
 		      RADEON_PRIO_SHADER_DATA);
 
+	si_pm4_upload_indirect_buffer(sctx, pm4);
 	sctx->init_config = pm4;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 78b68bc3dfe..db45cc181c6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1087,6 +1087,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 	/* Flush the context to re-emit the init_config state.
 	 * This is done only once in a lifetime of a context.
 	 */
+	si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
 	sctx->b.initial_gfx_cs_size = 0; /* force flush */
 	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
 
@@ -1288,6 +1289,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	/* Flush the context to re-emit the init_config state.
 	 * This is done only once in a lifetime of a context.
 	 */
+	si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
 	sctx->b.initial_gfx_cs_size = 0; /* force flush */
 	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
 
-- 
cgit v1.2.3


From d351bab9c52e96026b2d49cdfd0dc68fdd9a499e Mon Sep 17 00:00:00 2001
From: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Date: Tue, 1 Sep 2015 18:38:34 +1000
Subject: r600g: Simplify out a couple of unnecessary branches

Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/drivers/r600/r600_shader.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index b7d7828a9c2..1ab389c830b 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1966,13 +1966,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 
 	ctx.nliterals = 0;
 	ctx.literals = NULL;
-	shader->fs_write_all = FALSE;
-	if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-		shader->fs_write_all = TRUE;
 
-	shader->vs_position_window_space = FALSE;
-	if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION])
-		shader->vs_position_window_space = TRUE;
+	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
+	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
 
 	if (shader->vs_as_gs_a)
 		vs_add_primid_output(&ctx, key.vs.prim_id_out);
-- 
cgit v1.2.3


From e34834f059c68fc8cc6fc941bbde6d7a460595a7 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 25 Sep 2014 17:28:20 -0700
Subject: glapi: Inline x86_64_current_tls().

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mapi/entry_x86-64_tls.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/mapi/entry_x86-64_tls.h b/src/mapi/entry_x86-64_tls.h
index 5c03b045606..38facccc870 100644
--- a/src/mapi/entry_x86-64_tls.h
+++ b/src/mapi/entry_x86-64_tls.h
@@ -46,13 +46,6 @@ __asm__(".text\n"
 
 #ifndef MAPI_MODE_BRIDGE
 
-__asm__("x86_64_current_tls:\n\t"
-	"movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%rip), %rax\n\t"
-	"ret");
-
-extern unsigned long
-x86_64_current_tls();
-
 #include <string.h>
 #include "u_execmem.h"
 
@@ -90,7 +83,8 @@ entry_generate(int slot)
    char *code;
    mapi_func entry;
 
-   addr = x86_64_current_tls();
+   __asm__("movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%%rip), %0"
+           : "=r" (addr));
    if ((addr >> 32) != 0xffffffff)
       return NULL;
    addr &= 0xffffffff;
-- 
cgit v1.2.3


From 47b4efc710defee5a2bf81ad7c7626eee4e9aba5 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 31 Aug 2015 14:55:49 -0700
Subject: mesa: Move gl_vert_attrib from mtypes.h to shader_enums.h

It is a shader enum after all...

Acked-by: Brian Paul <brianp@vmware.com>
---
 src/glsl/shader_enums.h | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/mtypes.h  | 107 -----------------------------------------------
 2 files changed, 108 insertions(+), 107 deletions(-)

(limited to 'src')

diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index c6f4678f56f..9bb163f3bb0 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -45,6 +45,114 @@ typedef enum
 
 #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
 
+
+/**
+ * Indexes for vertex program attributes.
+ * GL_NV_vertex_program aliases generic attributes over the conventional
+ * attributes.  In GL_ARB_vertex_program shader the aliasing is optional.
+ * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
+ * generic attributes are distinct/separate).
+ */
+typedef enum
+{
+   VERT_ATTRIB_POS = 0,
+   VERT_ATTRIB_WEIGHT = 1,
+   VERT_ATTRIB_NORMAL = 2,
+   VERT_ATTRIB_COLOR0 = 3,
+   VERT_ATTRIB_COLOR1 = 4,
+   VERT_ATTRIB_FOG = 5,
+   VERT_ATTRIB_COLOR_INDEX = 6,
+   VERT_ATTRIB_EDGEFLAG = 7,
+   VERT_ATTRIB_TEX0 = 8,
+   VERT_ATTRIB_TEX1 = 9,
+   VERT_ATTRIB_TEX2 = 10,
+   VERT_ATTRIB_TEX3 = 11,
+   VERT_ATTRIB_TEX4 = 12,
+   VERT_ATTRIB_TEX5 = 13,
+   VERT_ATTRIB_TEX6 = 14,
+   VERT_ATTRIB_TEX7 = 15,
+   VERT_ATTRIB_POINT_SIZE = 16,
+   VERT_ATTRIB_GENERIC0 = 17,
+   VERT_ATTRIB_GENERIC1 = 18,
+   VERT_ATTRIB_GENERIC2 = 19,
+   VERT_ATTRIB_GENERIC3 = 20,
+   VERT_ATTRIB_GENERIC4 = 21,
+   VERT_ATTRIB_GENERIC5 = 22,
+   VERT_ATTRIB_GENERIC6 = 23,
+   VERT_ATTRIB_GENERIC7 = 24,
+   VERT_ATTRIB_GENERIC8 = 25,
+   VERT_ATTRIB_GENERIC9 = 26,
+   VERT_ATTRIB_GENERIC10 = 27,
+   VERT_ATTRIB_GENERIC11 = 28,
+   VERT_ATTRIB_GENERIC12 = 29,
+   VERT_ATTRIB_GENERIC13 = 30,
+   VERT_ATTRIB_GENERIC14 = 31,
+   VERT_ATTRIB_GENERIC15 = 32,
+   VERT_ATTRIB_MAX = 33
+} gl_vert_attrib;
+
+/**
+ * Symbolic constats to help iterating over
+ * specific blocks of vertex attributes.
+ *
+ * VERT_ATTRIB_FF
+ *   includes all fixed function attributes as well as
+ *   the aliased GL_NV_vertex_program shader attributes.
+ * VERT_ATTRIB_TEX
+ *   include the classic texture coordinate attributes.
+ *   Is a subset of VERT_ATTRIB_FF.
+ * VERT_ATTRIB_GENERIC
+ *   include the OpenGL 2.0+ GLSL generic shader attributes.
+ *   These alias the generic GL_ARB_vertex_shader attributes.
+ */
+#define VERT_ATTRIB_FF(i)           (VERT_ATTRIB_POS + (i))
+#define VERT_ATTRIB_FF_MAX          VERT_ATTRIB_GENERIC0
+
+#define VERT_ATTRIB_TEX(i)          (VERT_ATTRIB_TEX0 + (i))
+#define VERT_ATTRIB_TEX_MAX         MAX_TEXTURE_COORD_UNITS
+
+#define VERT_ATTRIB_GENERIC(i)      (VERT_ATTRIB_GENERIC0 + (i))
+#define VERT_ATTRIB_GENERIC_MAX     MAX_VERTEX_GENERIC_ATTRIBS
+
+/**
+ * Bitflags for vertex attributes.
+ * These are used in bitfields in many places.
+ */
+/*@{*/
+#define VERT_BIT_POS             BITFIELD64_BIT(VERT_ATTRIB_POS)
+#define VERT_BIT_WEIGHT          BITFIELD64_BIT(VERT_ATTRIB_WEIGHT)
+#define VERT_BIT_NORMAL          BITFIELD64_BIT(VERT_ATTRIB_NORMAL)
+#define VERT_BIT_COLOR0          BITFIELD64_BIT(VERT_ATTRIB_COLOR0)
+#define VERT_BIT_COLOR1          BITFIELD64_BIT(VERT_ATTRIB_COLOR1)
+#define VERT_BIT_FOG             BITFIELD64_BIT(VERT_ATTRIB_FOG)
+#define VERT_BIT_COLOR_INDEX     BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX)
+#define VERT_BIT_EDGEFLAG        BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)
+#define VERT_BIT_TEX0            BITFIELD64_BIT(VERT_ATTRIB_TEX0)
+#define VERT_BIT_TEX1            BITFIELD64_BIT(VERT_ATTRIB_TEX1)
+#define VERT_BIT_TEX2            BITFIELD64_BIT(VERT_ATTRIB_TEX2)
+#define VERT_BIT_TEX3            BITFIELD64_BIT(VERT_ATTRIB_TEX3)
+#define VERT_BIT_TEX4            BITFIELD64_BIT(VERT_ATTRIB_TEX4)
+#define VERT_BIT_TEX5            BITFIELD64_BIT(VERT_ATTRIB_TEX5)
+#define VERT_BIT_TEX6            BITFIELD64_BIT(VERT_ATTRIB_TEX6)
+#define VERT_BIT_TEX7            BITFIELD64_BIT(VERT_ATTRIB_TEX7)
+#define VERT_BIT_POINT_SIZE      BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE)
+#define VERT_BIT_GENERIC0        BITFIELD64_BIT(VERT_ATTRIB_GENERIC0)
+
+#define VERT_BIT(i)              BITFIELD64_BIT(i)
+#define VERT_BIT_ALL             BITFIELD64_RANGE(0, VERT_ATTRIB_MAX)
+
+#define VERT_BIT_FF(i)           VERT_BIT(i)
+#define VERT_BIT_FF_ALL          BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX)
+#define VERT_BIT_TEX(i)          VERT_BIT(VERT_ATTRIB_TEX(i))
+#define VERT_BIT_TEX_ALL         \
+   BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX)
+
+#define VERT_BIT_GENERIC(i)      VERT_BIT(VERT_ATTRIB_GENERIC(i))
+#define VERT_BIT_GENERIC_ALL     \
+   BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX)
+/*@}*/
+
+
 /**
  * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
  * fragment shader inputs.
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a172952c1fb..85a9f5dc5f1 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -94,113 +94,6 @@ struct vbo_context;
 #define PRIM_OUTSIDE_BEGIN_END   (PRIM_MAX + 1)
 #define PRIM_UNKNOWN             (PRIM_MAX + 2)
 
-/**
- * Indexes for vertex program attributes.
- * GL_NV_vertex_program aliases generic attributes over the conventional
- * attributes.  In GL_ARB_vertex_program shader the aliasing is optional.
- * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
- * generic attributes are distinct/separate).
- */
-typedef enum
-{
-   VERT_ATTRIB_POS = 0,
-   VERT_ATTRIB_WEIGHT = 1,
-   VERT_ATTRIB_NORMAL = 2,
-   VERT_ATTRIB_COLOR0 = 3,
-   VERT_ATTRIB_COLOR1 = 4,
-   VERT_ATTRIB_FOG = 5,
-   VERT_ATTRIB_COLOR_INDEX = 6,
-   VERT_ATTRIB_EDGEFLAG = 7,
-   VERT_ATTRIB_TEX0 = 8,
-   VERT_ATTRIB_TEX1 = 9,
-   VERT_ATTRIB_TEX2 = 10,
-   VERT_ATTRIB_TEX3 = 11,
-   VERT_ATTRIB_TEX4 = 12,
-   VERT_ATTRIB_TEX5 = 13,
-   VERT_ATTRIB_TEX6 = 14,
-   VERT_ATTRIB_TEX7 = 15,
-   VERT_ATTRIB_POINT_SIZE = 16,
-   VERT_ATTRIB_GENERIC0 = 17,
-   VERT_ATTRIB_GENERIC1 = 18,
-   VERT_ATTRIB_GENERIC2 = 19,
-   VERT_ATTRIB_GENERIC3 = 20,
-   VERT_ATTRIB_GENERIC4 = 21,
-   VERT_ATTRIB_GENERIC5 = 22,
-   VERT_ATTRIB_GENERIC6 = 23,
-   VERT_ATTRIB_GENERIC7 = 24,
-   VERT_ATTRIB_GENERIC8 = 25,
-   VERT_ATTRIB_GENERIC9 = 26,
-   VERT_ATTRIB_GENERIC10 = 27,
-   VERT_ATTRIB_GENERIC11 = 28,
-   VERT_ATTRIB_GENERIC12 = 29,
-   VERT_ATTRIB_GENERIC13 = 30,
-   VERT_ATTRIB_GENERIC14 = 31,
-   VERT_ATTRIB_GENERIC15 = 32,
-   VERT_ATTRIB_MAX = 33
-} gl_vert_attrib;
-
-/**
- * Symbolic constats to help iterating over
- * specific blocks of vertex attributes.
- *
- * VERT_ATTRIB_FF
- *   includes all fixed function attributes as well as
- *   the aliased GL_NV_vertex_program shader attributes.
- * VERT_ATTRIB_TEX
- *   include the classic texture coordinate attributes.
- *   Is a subset of VERT_ATTRIB_FF.
- * VERT_ATTRIB_GENERIC
- *   include the OpenGL 2.0+ GLSL generic shader attributes.
- *   These alias the generic GL_ARB_vertex_shader attributes.
- */
-#define VERT_ATTRIB_FF(i)           (VERT_ATTRIB_POS + (i))
-#define VERT_ATTRIB_FF_MAX          VERT_ATTRIB_GENERIC0
-
-#define VERT_ATTRIB_TEX(i)          (VERT_ATTRIB_TEX0 + (i))
-#define VERT_ATTRIB_TEX_MAX         MAX_TEXTURE_COORD_UNITS
-
-#define VERT_ATTRIB_GENERIC(i)      (VERT_ATTRIB_GENERIC0 + (i))
-#define VERT_ATTRIB_GENERIC_MAX     MAX_VERTEX_GENERIC_ATTRIBS
-
-/**
- * Bitflags for vertex attributes.
- * These are used in bitfields in many places.
- */
-/*@{*/
-#define VERT_BIT_POS             BITFIELD64_BIT(VERT_ATTRIB_POS)
-#define VERT_BIT_WEIGHT          BITFIELD64_BIT(VERT_ATTRIB_WEIGHT)
-#define VERT_BIT_NORMAL          BITFIELD64_BIT(VERT_ATTRIB_NORMAL)
-#define VERT_BIT_COLOR0          BITFIELD64_BIT(VERT_ATTRIB_COLOR0)
-#define VERT_BIT_COLOR1          BITFIELD64_BIT(VERT_ATTRIB_COLOR1)
-#define VERT_BIT_FOG             BITFIELD64_BIT(VERT_ATTRIB_FOG)
-#define VERT_BIT_COLOR_INDEX     BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX)
-#define VERT_BIT_EDGEFLAG        BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)
-#define VERT_BIT_TEX0            BITFIELD64_BIT(VERT_ATTRIB_TEX0)
-#define VERT_BIT_TEX1            BITFIELD64_BIT(VERT_ATTRIB_TEX1)
-#define VERT_BIT_TEX2            BITFIELD64_BIT(VERT_ATTRIB_TEX2)
-#define VERT_BIT_TEX3            BITFIELD64_BIT(VERT_ATTRIB_TEX3)
-#define VERT_BIT_TEX4            BITFIELD64_BIT(VERT_ATTRIB_TEX4)
-#define VERT_BIT_TEX5            BITFIELD64_BIT(VERT_ATTRIB_TEX5)
-#define VERT_BIT_TEX6            BITFIELD64_BIT(VERT_ATTRIB_TEX6)
-#define VERT_BIT_TEX7            BITFIELD64_BIT(VERT_ATTRIB_TEX7)
-#define VERT_BIT_POINT_SIZE      BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE)
-#define VERT_BIT_GENERIC0        BITFIELD64_BIT(VERT_ATTRIB_GENERIC0)
-
-#define VERT_BIT(i)              BITFIELD64_BIT(i)
-#define VERT_BIT_ALL             BITFIELD64_RANGE(0, VERT_ATTRIB_MAX)
-
-#define VERT_BIT_FF(i)           VERT_BIT(i)
-#define VERT_BIT_FF_ALL          BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX)
-#define VERT_BIT_TEX(i)          VERT_BIT(VERT_ATTRIB_TEX(i))
-#define VERT_BIT_TEX_ALL         \
-   BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX)
-
-#define VERT_BIT_GENERIC(i)      VERT_BIT(VERT_ATTRIB_GENERIC(i))
-#define VERT_BIT_GENERIC_ALL     \
-   BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX)
-/*@}*/
-
-
 #define VARYING_SLOT_MAX	(VARYING_SLOT_VAR0 + MAX_VARYING)
 #define VARYING_SLOT_PATCH0	(VARYING_SLOT_MAX)
 #define VARYING_SLOT_TESS_MAX	(VARYING_SLOT_PATCH0 + MAX_VARYING)
-- 
cgit v1.2.3


From 84dad65088147fa8c177c3e6aea20c8ae0868fde Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: util: added util_set_index_buffer()

Like util_set_vertex_buffers_count(), this basically just copies a
pipe_index_buffer object, taking care of refcounting.
---
 src/gallium/auxiliary/util/u_helpers.c | 15 +++++++++++++++
 src/gallium/auxiliary/util/u_helpers.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c
index ac1edcdbb82..09619c1c9e9 100644
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -88,3 +88,18 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
 
    *dst_count = util_last_bit(enabled_buffers);
 }
+
+
+void
+util_set_index_buffer(struct pipe_index_buffer *dst,
+                      const struct pipe_index_buffer *src)
+{
+   if (src) {
+      pipe_resource_reference(&dst->buffer, src->buffer);
+      memcpy(dst, src, sizeof(*dst));
+   }
+   else {
+      pipe_resource_reference(&dst->buffer, NULL);
+      memset(dst, 0, sizeof(*dst));
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h
index 09c7116fa6d..f25f2807fe5 100644
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -44,6 +44,9 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                    const struct pipe_vertex_buffer *src,
                                    unsigned start_slot, unsigned count);
 
+void util_set_index_buffer(struct pipe_index_buffer *dst,
+                           const struct pipe_index_buffer *src);
+
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 56852e925e262c9a10454ed59a42ce12fb9c801c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: util: added ffsll() function

v2: fix errant _GNU_SOURCE test, per Matt Turner.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/gallium/auxiliary/util/u_math.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 7175d1d4ee8..e92f83a8109 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -389,6 +389,26 @@ unsigned ffs( unsigned u )
 #define ffs __builtin_ffs
 #endif
 
+#ifdef HAVE___BUILTIN_FFSLL
+#define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+   int bit;
+
+   bit = ffs((unsigned) (val & 0xffffffff));
+   if (bit != 0)
+      return bit;
+
+   bit = ffs((unsigned) (val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+#endif
+
 #endif /* FFS_DEFINED */
 
 /**
-- 
cgit v1.2.3


From bd883c90708207c6848b7f7b263ce1ef4e6a475b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: add negate parameter to tgsi_transform_kill_inst()

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c  | 3 ++-
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 3 ++-
 src/gallium/auxiliary/tgsi/tgsi_transform.h     | 5 +++--
 src/gallium/auxiliary/util/u_pstipple.c         | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 3918923296d..063e36828d7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -240,7 +240,8 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);
 
    /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
-   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_Y, TRUE);
 
    /* compute coverage factor = (1-d)/(1-k) */
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 186b4cb4935..a51e91fe931 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,7 +280,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
 
    /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
    tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
 }
 
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index ceb7c2e0f46..9b68f6a5feb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -399,7 +399,8 @@ static inline void
 tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                          unsigned src_file,
                          unsigned src_index,
-                         unsigned src_swizzle)
+                         unsigned src_swizzle,
+                         boolean negate)
 {
    struct tgsi_full_instruction inst;
 
@@ -413,7 +414,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
    inst.Src[0].Register.SwizzleY =
    inst.Src[0].Register.SwizzleZ =
    inst.Src[0].Register.SwizzleW = src_swizzle;
-   inst.Src[0].Register.Negate = 1;
+   inst.Src[0].Register.Negate = negate;
 
    ctx->emit_instruction(ctx, &inst);
 }
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index 1f65672221f..0bb46ff8dd1 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -339,7 +339,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
    /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
    tgsi_transform_kill_inst(ctx,
                             TGSI_FILE_TEMPORARY, texTemp,
-                            TGSI_SWIZZLE_W);
+                            TGSI_SWIZZLE_W, TRUE);
 }
 
 
-- 
cgit v1.2.3


From f8da1e14599d96ced92a548d56e9110a8a54e772 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: added tgsi_is_shadow_target() helper

---
 src/gallium/auxiliary/tgsi/tgsi_util.c | 18 ++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_util.h |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index e5b8427a030..653e650dc4c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -462,3 +462,21 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
 
    return dim;
 }
+
+
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+   switch (target) {
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index deb1ecc66f9..6175d95fcd6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -82,6 +82,9 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
 int
 tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);
 
+boolean
+tgsi_is_shadow_target(unsigned target);
+
 #if defined __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 17756876378137003443f9eebb51002564b2ac2a Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: add some more helper functions

This patch adds some more helper functions such as
   . tgsi_transform_temps_decl
   . tgsi_transform_output_decl
   . tgsi_transform_dst_reg
   . tgsi_transform_src_reg

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_transform.h | 69 +++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 9b68f6a5feb..3bd512b6f3e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -95,19 +95,38 @@ struct tgsi_transform_context
  * Helper for emitting temporary register declarations.
  */
 static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
-                         unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
 {
    struct tgsi_full_declaration decl;
 
    decl = tgsi_default_full_declaration();
    decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.Range.First =
-   decl.Range.Last = index;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
    ctx->emit_declaration(ctx, &decl);
 }
 
+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+                         unsigned index)
+{
+   tgsi_transform_temps_decl(ctx, index, index);
+}
 
+static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_CONSTANT;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
+   ctx->emit_declaration(ctx, &decl);
+}
+ 
 static inline void
 tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                           unsigned index,
@@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
    ctx->emit_declaration(ctx, &decl);
 }
 
+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+                          unsigned index,
+                          unsigned sem_name, unsigned sem_index,
+                          unsigned interp)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Interpolate = 1;
+   decl.Declaration.Semantic = 1;
+   decl.Semantic.Name = sem_name;
+   decl.Semantic.Index = sem_index;
+   decl.Range.First =
+   decl.Range.Last = index;
+   decl.Interp.Interpolate = interp;
+
+   ctx->emit_declaration(ctx, &decl);
+}
 
 static inline void
 tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
@@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
    ctx->emit_immediate(ctx, &immed);
 }
 
+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+                       unsigned file, unsigned index, unsigned writemask)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+                       unsigned file, unsigned index, 
+                       unsigned swizzleX, unsigned swizzleY,
+                       unsigned swizzleZ, unsigned swizzleW)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.SwizzleX = swizzleX; 
+   reg->Register.SwizzleY = swizzleY; 
+   reg->Register.SwizzleZ = swizzleZ; 
+   reg->Register.SwizzleW = swizzleW; 
+}
 
 /**
  * Helper for emitting 1-operand instructions.
-- 
cgit v1.2.3


From fec4f5de67b22a7048266fb7e57f49fe6fc3744a Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: gallium/util: return FALSE for intersection if there's empty
 rectangles

It isn't really obvious if intersection test should take into account empty
rectangles or if the caller should do it. But it looks like most callers
actually verified one of the rects but not the other, but since correctly
returning an empty rect that other rect could actually be empty leading to
more bugs. Hence just verify both rects for emptyness in the intersection
test itself which makes the code easier in the caller (though it will be
slower if the caller knows the rectangles are non-empty).

Reviewed-by: Zack Rusin <zackr@vmware.com>
---
 src/gallium/auxiliary/util/u_rect.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index b26f671f313..dea1e1ecff3 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,6 +42,7 @@ struct u_rect {
 };
 
 /* Do two rectangles intersect?
+ * Note: empty rectangles are valid as inputs (and never intersect).
  */
 static inline boolean
 u_rect_test_intersection(const struct u_rect *a,
@@ -50,7 +51,11 @@ u_rect_test_intersection(const struct u_rect *a,
    return (!(a->x1 < b->x0 ||
              b->x1 < a->x0 ||
              a->y1 < b->y0 ||
-             b->y1 < a->y0));
+             b->y1 < a->y0 ||
+             a->x1 < a->x0 ||
+             a->y1 < a->y0 ||
+             b->x1 < b->x0 ||
+             b->y1 < b->y0));
 }
 
 /* Find the intersection of two rectangles known to intersect.
-- 
cgit v1.2.3


From 17542086174ed1c2ea47f3b9b5917ce478442819 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: gallium/util: fix returning empty box for rectangle intersection

These functions deal with inclusive coordinates, hence a 0/0/0/0 rect
returned when there's no intersection doesn't actually represent an empty
rectangle. Hence return 0/-1/0/-1 instead.
This fixes some problems in llvmpipe with empty scissor rects (which up
to now didn't really matter because while the intersect test returned the
wrong result all pixels were scissored away later anyway).
---
 src/gallium/auxiliary/util/u_rect.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index dea1e1ecff3..221d9188730 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -87,7 +87,12 @@ u_rect_possible_intersection(const struct u_rect *a,
       u_rect_find_intersection(a,b);
    }
    else {
-      b->x0 = b->x1 = b->y0 = b->y1 = 0;
+      /*
+       * Note the u_rect_xx tests deal with inclusive coordinates
+       * hence all-zero would not be an empty box.
+       */
+      b->x0 = b->y0 = 0;
+      b->x1 = b->y1 = -1;
    }
 }
 
-- 
cgit v1.2.3


From 0c4b6215909f140305dfa65ca6b58e8119e229b8 Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: gallium/util: add a utility to create geometry passthrough shader

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_simple_shaders.c | 51 +++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_simple_shaders.h |  6 ++++
 2 files changed, 57 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 6d29cab9207..6eed33769dd 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -831,3 +831,54 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
 
    return ureg_create_shader_and_destroy(ureg, pipe);
 }
+
+void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes)
+{
+   static const unsigned zero[4] = {0, 0, 0, 0};
+
+   struct ureg_program *ureg;
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_src imm;
+
+   unsigned i;
+
+   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+   if (ureg == NULL)
+      return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1);
+   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1);
+   imm = ureg_DECL_immediate_uint(ureg, zero, 4);
+
+   /**
+    * Loop over all the attribs and declare the corresponding
+    * declarations in the geometry shader
+    */
+   for (i = 0; i < num_attribs; i++) {
+      src[i] = ureg_DECL_input(ureg, semantic_names[i],
+                               semantic_indexes[i], 0, 1);
+      src[i] = ureg_src_dimension(src[i], 0);
+      dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]);
+   }
+
+   /* MOV dst[i] src[i] */
+   for (i = 0; i < num_attribs; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   /* EMIT IMM[0] */
+   ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1);
+
+   /* END */
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index 08d798ef541..cda0f2e86ec 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -146,6 +146,12 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                    unsigned tgsi_tex, unsigned nr_samples,
                                    enum tgsi_return_type stype);
 
+extern void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes);
+
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From da33c2434b1dc4ca24a131a9625166278333b7d3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: util: add util_strcasecmp() wrapper

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/util/u_string.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h
index f7ab09c8f1c..adcdf200ec8 100644
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -199,6 +199,8 @@ util_memmove(void *dest, const void *src, size_t n)
 }
 
 
+#define util_strcasecmp stricmp
+
 #else
 
 #define util_vsnprintf vsnprintf
@@ -211,6 +213,7 @@ util_memmove(void *dest, const void *src, size_t n)
 #define util_strncat strncat
 #define util_strstr strstr
 #define util_memmove memmove
+#define util_strcasecmp strcasecmp
 
 #endif
 
-- 
cgit v1.2.3


From a65bdf5f47689b52ec97181ec870b089872b508e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: add new tgsi_two_side.c utility code

This could be used by any driver where the device doesn't directly
support two-sided lighting.  This code modifies a fragment shader
to accecpt back-face colors and choose between the front/back colors
depending on the triangle's front-face sign.
---
 src/gallium/auxiliary/Makefile.sources     |   2 +
 src/gallium/auxiliary/tgsi/tgsi_two_side.c | 228 +++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_two_side.h |  34 +++++
 3 files changed, 264 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_two_side.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_two_side.h

(limited to 'src')

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 3616d885b47..d5986b45e06 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -154,6 +154,8 @@ C_SOURCES := \
 	tgsi/tgsi_text.h \
 	tgsi/tgsi_transform.c \
 	tgsi/tgsi_transform.h \
+	tgsi/tgsi_two_side.c \
+	tgsi/tgsi_two_side.h \
 	tgsi/tgsi_ureg.c \
 	tgsi/tgsi_ureg.h \
 	tgsi/tgsi_util.c \
diff --git a/src/gallium/auxiliary/tgsi/tgsi_two_side.c b/src/gallium/auxiliary/tgsi/tgsi_two_side.c
new file mode 100644
index 00000000000..2406e2876f3
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms fragment shaders to facilitate two-sided lighting.
+ *
+ * Basically, if the FS has any color inputs (TGSI_SEMANTIC_COLOR) we'll:
+ * 1. create corresponding back-color inputs (TGSI_SEMANTIC_BCOLOR)
+ * 2. use the FACE register to choose between front/back colors and put the
+ *    selected color in new temp regs.
+ * 3. replace reads of the original color inputs with the new temp regs.
+ *
+ * Then, the driver just needs to link the VS front/back output colors to
+ * the FS front/back input colors.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_two_side.h"
+#include "tgsi_transform.h"
+
+
+#define INVALID_INDEX 9999
+
+
+struct two_side_transform_context
+{
+   struct tgsi_transform_context base;
+   uint num_temps;
+   uint num_inputs;
+   uint face_input;           /**< index of the FACE input */
+   uint front_color_input[2]; /**< INPUT regs */
+   uint front_color_interp[2];/**< TGSI_INTERPOLATE_x */
+   uint back_color_input[2];  /**< INPUT regs */
+   uint new_colors[2];        /**< TEMP regs */
+};
+
+
+static inline struct two_side_transform_context *
+two_side_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct two_side_transform_context *) ctx;
+}
+
+
+static void
+xform_decl(struct tgsi_transform_context *ctx,
+           struct tgsi_full_declaration *decl)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+         /* found a front color */
+         assert(decl->Semantic.Index < 2);
+         ts->front_color_input[decl->Semantic.Index] = decl->Range.First;
+         ts->front_color_interp[decl->Semantic.Index] = decl->Interp.Interpolate;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+         ts->face_input = decl->Range.First;
+      }
+      ts->num_inputs = MAX2(ts->num_inputs, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_temps = MAX2(ts->num_temps, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+emit_prolog(struct tgsi_transform_context *ctx)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction inst;
+   uint num_colors = 0;
+   uint i;
+
+   /* Declare 0, 1 or 2 new BCOLOR inputs */
+   for (i = 0; i < 2; i++) {
+      if (ts->front_color_input[i] != INVALID_INDEX) {
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Interpolate = 1;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+         decl.Semantic.Index = i;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         decl.Interp.Interpolate = ts->front_color_interp[i];
+         ctx->emit_declaration(ctx, &decl);
+         ts->back_color_input[i] = decl.Range.First;
+         num_colors++;
+      }
+   }
+
+   if (num_colors > 0) {
+      /* Declare 1 or 2 temp registers */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.Range.First = ts->num_temps;
+      decl.Range.Last = ts->num_temps + num_colors - 1;
+      ctx->emit_declaration(ctx, &decl);
+      ts->new_colors[0] = ts->num_temps;
+      ts->new_colors[1] = ts->num_temps + 1;
+
+      if (ts->face_input == INVALID_INDEX) {
+         /* declare FACE INPUT register */
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+         decl.Semantic.Index = 0;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         ctx->emit_declaration(ctx, &decl);
+         ts->face_input = decl.Range.First;
+      }
+
+      /* CMP temp[c0], face, bcolor[c0], fcolor[c0]
+       * temp[c0] = face < 0.0 ? bcolor[c0] : fcolor[c0]
+       */
+      for (i = 0; i < 2; i++) {
+         if (ts->front_color_input[i] != INVALID_INDEX) {
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+            inst.Instruction.NumDstRegs = 1;
+            inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+            inst.Dst[0].Register.Index = ts->new_colors[i];
+            inst.Instruction.NumSrcRegs = 3;
+            inst.Src[0].Register.File = TGSI_FILE_INPUT;
+            inst.Src[0].Register.Index = ts->face_input;
+            inst.Src[1].Register.File = TGSI_FILE_INPUT;
+            inst.Src[1].Register.Index = ts->back_color_input[i];
+            inst.Src[2].Register.File = TGSI_FILE_INPUT;
+            inst.Src[2].Register.Index = ts->front_color_input[i];
+
+            ctx->emit_instruction(ctx, &inst);
+         }
+      }
+   }
+}
+
+
+static void
+xform_inst(struct tgsi_transform_context *ctx,
+           struct tgsi_full_instruction *inst)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   const struct tgsi_opcode_info *info =
+      tgsi_get_opcode_info(inst->Instruction.Opcode);
+   uint i, j;
+
+   /* Look for src regs which reference the input color and replace
+    * them with the temp color.
+    */
+   for (i = 0; i < info->num_src; i++) {
+      if (inst->Src[i].Register.File == TGSI_FILE_INPUT) {
+         for (j = 0; j < 2; j++) {
+            if (inst->Src[i].Register.Index == ts->front_color_input[j]) {
+               /* replace color input with temp reg */
+               inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
+               inst->Src[i].Register.Index = ts->new_colors[j];
+               break;
+            }
+         }
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in)
+{
+   struct two_side_transform_context transform;
+   const uint num_new_tokens = 100; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = xform_decl;
+   transform.base.transform_instruction = xform_inst;
+   transform.base.prolog = emit_prolog;
+   transform.face_input = INVALID_INDEX;
+   transform.front_color_input[0] = INVALID_INDEX;
+   transform.front_color_input[1] = INVALID_INDEX;
+   transform.front_color_interp[0] = TGSI_INTERPOLATE_COLOR;
+   transform.front_color_interp[1] = TGSI_INTERPOLATE_COLOR;
+   transform.back_color_input[0] = INVALID_INDEX;
+   transform.back_color_input[1] = INVALID_INDEX;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_two_side.h b/src/gallium/auxiliary/tgsi/tgsi_two_side.h
new file mode 100644
index 00000000000..bac239e41af
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_TWO_SIDE_H
+#define TGSI_TWO_SIDE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in);
+
+#endif /* TGSI_TWO_SIDE_H */
-- 
cgit v1.2.3


From bca238d4f55dd0a9325132c73b1acade51017ba3 Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: adds tgsi utility to transform a shader to support point sprite

This adds a tgsi utility tgsi_add_point_sprite to transform a geometry
shader to emulate wide points by drawing quads. This utility adds an
extra output for the original point position if the point position is
to be written to a stream output buffer. It also assumes the driver will
add a constant for inverse viewport scale after the user defined constants.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/Makefile.sources         |   2 +
 src/gallium/auxiliary/tgsi/tgsi_point_sprite.c | 582 +++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_point_sprite.h |  38 ++
 3 files changed, 622 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_point_sprite.h

(limited to 'src')

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index d5986b45e06..cc6fe7d3ae3 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -144,6 +144,8 @@ C_SOURCES := \
 	tgsi/tgsi_opcode_tmp.h \
 	tgsi/tgsi_parse.c \
 	tgsi/tgsi_parse.h \
+	tgsi/tgsi_point_sprite.c \
+	tgsi/tgsi_point_sprite.h \
 	tgsi/tgsi_sanity.c \
 	tgsi/tgsi_sanity.h \
 	tgsi/tgsi_scan.c \
diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
new file mode 100644
index 00000000000..cb8dbcb29ec
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ *       after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+   static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+                                   TGSI_SWIZZLE_Y};
+   assert(x >= -1);
+   assert(x <= 1);
+   assert(y >= -1);
+   assert(y <= 1);
+   assert(z >= -1);
+   assert(z <= 1);
+   assert(w >= -1);
+   assert(w <= 1);
+
+   return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+   assert(component < 4);
+   return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+   struct tgsi_transform_context base;
+   unsigned num_tmp;
+   unsigned num_out;
+   unsigned num_orig_out;
+   unsigned num_const;
+   unsigned num_imm;
+   unsigned point_size_in;          // point size input
+   unsigned point_size_out;         // point size output
+   unsigned point_size_tmp;         // point size temp
+   unsigned point_pos_in;           // point pos input
+   unsigned point_pos_out;          // point pos output
+   unsigned point_pos_sout;         // original point pos for streamout
+   unsigned point_pos_tmp;          // point pos temp
+   unsigned point_scale_tmp;        // point scale temp
+   unsigned point_color_out;        // point color output
+   unsigned point_color_tmp;        // point color temp
+   unsigned point_imm;              // point immediates
+   unsigned point_ivp;              // point inverseViewport constant
+   unsigned point_dir_swz[4];       // point direction swizzle
+   unsigned point_coord_swz[4];     // point coord swizzle
+   unsigned point_coord_enable;     // point coord enable mask
+   unsigned point_coord_decl;       // point coord output declared mask
+   unsigned point_coord_out;        // point coord output starting index
+   unsigned point_coord_aa;         // aa point coord semantic index
+   unsigned point_coord_k;          // aa point coord threshold distance
+   unsigned stream_out_point_pos:1; // set if to stream out original point pos
+   unsigned aa_point:1;             // set if doing aa point
+   unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+   int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+             struct tgsi_full_declaration *decl)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_in = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_in = decl->Range.First;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+               decl->Semantic.Index < 32) {
+         ts->point_coord_decl |= 1 << decl->Semantic.Index;
+         ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+      }
+      ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+      ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   unsigned point_coord_enable, en;
+   int i;
+
+   /* Replace output registers with temporary registers */
+   for (i = 0; i < ts->num_out; i++) {
+      ts->out_tmp_index[i] = ts->num_tmp++;
+   }
+   ts->num_orig_out = ts->num_out;
+
+   /* Declare a tmp register for point scale */
+   ts->point_scale_tmp = ts->num_tmp++;
+
+   if (ts->point_size_out != INVALID_INDEX)
+      ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+   else
+      ts->point_size_tmp = ts->num_tmp++;
+
+   assert(ts->point_pos_out != INVALID_INDEX);
+   ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+   ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+   /* Declare one more tmp register for point coord threshold distance
+    * if we are generating anti-aliased point.
+    */
+   if (ts->aa_point)
+      ts->point_coord_k = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+   /* Declare an extra output for the original point position for stream out */
+   if (ts->stream_out_point_pos) {
+      ts->point_pos_sout = ts->num_out++;
+      tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+                                 TGSI_SEMANTIC_GENERIC, 0, 0);
+   }
+
+   /* point coord outputs to be declared */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* Declare outputs for those point coord that are enabled but are not
+    * already declared in this shader.
+    */
+   ts->point_coord_out = ts->num_out;
+   if (point_coord_enable) {
+      for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+         if (en & 0x1) {
+            tgsi_transform_output_decl(ctx, ts->num_out++,
+                                       TGSI_SEMANTIC_GENERIC, i, 0);
+            ts->max_generic = MAX2(ts->max_generic, i);
+         }
+      }
+   }
+
+   /* add an extra generic output for aa point texcoord */
+   if (ts->aa_point) {
+      ts->point_coord_aa = ts->max_generic + 1;
+      assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+      ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+      tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+                                 ts->point_coord_aa, 0);
+   }
+
+   /* Declare extra immediates */
+   ts->point_imm = ts->num_imm;
+   tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+   /* Declare point constant -
+    * constant.xy -- inverseViewport
+    * constant.z -- current point size
+    * constant.w -- max point size
+    * The driver needs to add this constant to the constant buffer
+    */
+   ts->point_ivp = ts->num_const++;
+   tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+   /* If this geometry shader does not specify point size,
+    * get the current point size from the point constant.
+    */
+   if (ts->point_size_out == INVALID_INDEX) {
+      struct tgsi_full_instruction inst;
+
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 1;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+                             ts->point_ivp, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      ctx->emit_instruction(ctx, &inst);
+   }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+                         struct tgsi_full_instruction *vert_inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   struct tgsi_full_instruction inst;
+   unsigned point_coord_enable, en;
+   unsigned i, j, s;
+
+   /* new point coord outputs */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* OUTPUT[pos_sout] = TEMP[pos] */
+   if (ts->point_pos_sout != INVALID_INDEX) {
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, ts->point_pos_sout,
+                              TGSI_WRITEMASK_XYZW,
+                              TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+   }
+
+   /**
+    * Set up the point scale vector
+    * scale = pointSize * pos.w * inverseViewport
+    */
+
+   /* MUL point_scale.x, point_size.x, point_pos.w */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                  TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+                  TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                  TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+   /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+   inst.Instruction.NumDstRegs = 1;
+   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+   inst.Instruction.NumSrcRegs = 2;
+   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+                          ts->point_ivp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+   ctx->emit_instruction(ctx, &inst);
+
+   /**
+    * Set up the point coord threshold distance
+    * k = 0.5 - 1 / pointsize
+    */
+   if (ts->aa_point) {
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Y,
+                                  TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+                                  TGSI_SWIZZLE_X);
+
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_SWIZZLE_X);
+   }
+
+
+   for (i = 0; i < 4; i++) {
+      unsigned point_dir_swz = ts->point_dir_swz[i];
+      unsigned point_coord_swz = ts->point_coord_swz[i];
+
+      /* All outputs need to be emitted for each vertex */
+      for (j = 0; j < ts->num_orig_out; j++) {
+         if (ts->out_tmp_index[j] != INVALID_INDEX) {
+            tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                                    TGSI_FILE_OUTPUT, j,
+                                    TGSI_WRITEMASK_XYZW,
+                                    TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+         }
+      }
+
+      /* pos = point_scale * point_dir + point_pos */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+                             TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 3;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_X);
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                             get_swizzle(point_dir_swz, 0),
+                             get_swizzle(point_dir_swz, 1),
+                             get_swizzle(point_dir_swz, 2),
+                             get_swizzle(point_dir_swz, 3));
+      tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_W);
+      ctx->emit_instruction(ctx, &inst);
+
+      /* point coord */
+      for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+         unsigned dstReg;
+
+         if (en & 0x1) {
+            dstReg = ts->point_coord_out + j;
+
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+            inst.Instruction.NumDstRegs = 1;
+            tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+                                   dstReg, TGSI_WRITEMASK_XYZW);
+            inst.Instruction.NumSrcRegs = 1;
+            tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                   get_swizzle(point_coord_swz, 0),
+                                   get_swizzle(point_coord_swz, 1),
+                                   get_swizzle(point_coord_swz, 2),
+                                   get_swizzle(point_coord_swz, 3));
+            ctx->emit_instruction(ctx, &inst);
+
+            /* MOV point_coord.z  point_coord_k.x */
+            if (s == ts->point_coord_aa) {
+               tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+                                           TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+                                           TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                           TGSI_SWIZZLE_X);
+            }
+            j++;  /* the next point coord output offset */
+         }
+      }
+
+      /* Emit the EMIT instruction for each vertex of the quad */
+      ctx->emit_instruction(ctx, vert_inst);
+   }
+
+   /* Emit the ENDPRIM instruction for the quad */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+   inst.Instruction.NumDstRegs = 0;
+   inst.Instruction.NumSrcRegs = 1;
+   inst.Src[0] = vert_inst->Src[0];
+   ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+             struct tgsi_full_instruction *inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+      psprite_emit_vertex_inst(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_size_out) {
+      /**
+       * Replace point size output reg with tmp reg.
+       * The tmp reg will be later used as a src reg for computing
+       * the point scale factor.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_size_tmp;
+      ctx->emit_instruction(ctx, inst);
+
+      /* Clamp the point size */
+      /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+      /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_pos_out) {
+      /**
+       * Replace point pos output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_pos_tmp;
+      ctx->emit_instruction(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      /**
+       * Replace output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+      ctx->emit_instruction(ctx, inst);
+   }
+   else {
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+                 struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+       prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+       break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+       prop->u[0].Data *= 4;
+       break;
+   default:
+       break;
+   }
+   ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index)
+{
+   struct psprite_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = psprite_decl;
+   transform.base.transform_instruction = psprite_inst;
+   transform.base.transform_property = psprite_property;
+   transform.base.transform_immediate = psprite_immediate;
+   transform.base.prolog = psprite_prolog;
+
+   transform.point_size_in = INVALID_INDEX;
+   transform.point_size_out = INVALID_INDEX;
+   transform.point_size_tmp = INVALID_INDEX;
+   transform.point_pos_in = INVALID_INDEX;
+   transform.point_pos_out = INVALID_INDEX;
+   transform.point_pos_sout = INVALID_INDEX;
+   transform.point_pos_tmp = INVALID_INDEX;
+   transform.point_scale_tmp = INVALID_INDEX;
+   transform.point_imm = INVALID_INDEX;
+   transform.point_coord_aa = INVALID_INDEX;
+   transform.point_coord_k = INVALID_INDEX;
+
+   transform.stream_out_point_pos = stream_out_point_pos;
+   transform.point_coord_enable = point_coord_enable;
+   transform.aa_point = aa_point_coord_index != NULL;
+   transform.max_generic = -1;
+
+   /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+   /* (-1, -1, 0, 0) */
+   transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+   /* (-1, 1, 0, 0) */
+   transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+   /* (1, -1, 0, 0) */
+   transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+   /* (1, 1, 0, 0) */
+   transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+   /* point coord based on the immediates (0, 1, 0, -1) */
+   if (sprite_origin_lower_left) {
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+   }
+   else {
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+   }
+
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   if (aa_point_coord_index)
+      *aa_point_coord_index = transform.point_coord_aa;
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
new file mode 100644
index 00000000000..d1958914bc3
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_POINT_SPRITE_H
+#define TGSI_POINT_SPRITE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index);
+
+#endif /* TGSI_POINT_SPRITE_H */
-- 
cgit v1.2.3


From 14f35194d89901a04cb2180a788f90e1bedf5399 Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: tgsi: add tgsi utility to transform a fragment shader to support aa
 point

This adds a tgsi utility tgsi_add_aa_point to transform a fragment shader
to support anti-aliased wide point by computing the fragment distance from
the point center. This utility assumes the geometry shader is emitting
an extra generic output with point coord data. The semantic index of
this generic output is passed to the tgsi_add_aa_point utility.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/Makefile.sources     |   2 +
 src/gallium/auxiliary/tgsi/tgsi_aa_point.c | 309 +++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_aa_point.h |  35 ++++
 3 files changed, 346 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_aa_point.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_aa_point.h

(limited to 'src')

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index cc6fe7d3ae3..56fa84055b7 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -129,6 +129,8 @@ C_SOURCES := \
 	rtasm/rtasm_execmem.h \
 	rtasm/rtasm_x86sse.c \
 	rtasm/rtasm_x86sse.h \
+	tgsi/tgsi_aa_point.c \
+	tgsi/tgsi_aa_point.h \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_build.h \
 	tgsi/tgsi_dump.c \
diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
new file mode 100644
index 00000000000..9016effd388
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+   struct tgsi_transform_context base;
+
+   unsigned tmp;           // temp register
+   unsigned color_out;     // frag color out register
+   unsigned color_tmp;     // frag color temp register
+   unsigned num_tmp;       // number of temp registers
+   unsigned num_imm;       // number of immediates
+   unsigned num_input;     // number of inputs
+   unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+              struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
+         ts->color_out = decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      ts->num_input++;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned tmp0;
+   unsigned texIn;
+   unsigned imm;
+
+   /* Declare two temporary registers, one for temporary and
+    * one for color.
+    */
+   ts->tmp = ts->num_tmp++;
+   ts->color_tmp = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+   /* Declare new generic input/texcoord */
+   texIn = ts->num_input++;
+   tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+                             ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+   /* Declare extra immediates */
+   imm = ts->num_imm++;
+   tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+   /*
+    * Emit code to compute fragment coverage.
+    * The point always has radius 0.5.  The threshold value will be a
+    * value less than, but close to 0.5, such as 0.45.
+    * We compute a coverage factor from the distance and threshold.
+    * If the coverage is negative, the fragment is outside the circle and
+    * it's discarded.
+    * If the coverage is >= 1, the fragment is fully inside the threshold
+    * distance.  We limit/clamp the coverage to 1.
+    * Otherwise, the fragment is between the threshold value and 0.5 and we
+    * compute a coverage value in [0,1].
+    *
+    * Input reg (texIn) usage:
+    *  texIn.x = x point coord in [0,1]
+    *  texIn.y = y point coord in [0,1]
+    *  texIn.z = "k" the smoothing threshold distance
+    *  texIn.w = unused
+    *
+    * Temp reg (t0) usage:
+    *  t0.x = distance of fragment from center point
+    *  t0.y = boolean, is t0.x > 0.5, also misc temp usage
+    *  t0.z = temporary for computing 1/(0.5-k) value
+    *  t0.w = final coverage value
+    */
+
+   tmp0 = ts->tmp;
+
+   /* SUB t0.xy, texIn, (0.5, 0,5) */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+                           TGSI_FILE_INPUT, texIn,
+                           TGSI_FILE_IMMEDIATE, imm);
+
+   /* DP2 t0.x, t0.xy, t0.xy;  # t0.x = x^2 + y^2 */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* SQRT t0.x, t0.x */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+   /* SUB t0.w, 0.5, texIn.z;  # t0.w = 0.5-k */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+   /* SUB t0.y, 0.5, t0.x;  # t0.y = 0.5-d */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+   /* DIV t0.w, t0.y, t0.w;  # coverage = (0.5-d)/(0.5-k) */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+   /* If the coverage value is negative, it means the fragment is outside
+    * the point's circular boundary.  Kill it.
+    */
+   /* KILL_IF tmp0.w;  # if tmp0.w < 0 KILL */
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_W, FALSE);
+
+   /* If the distance is less than the threshold, the coverage/alpha value
+    * will be greater than one.  Clamp to one here.
+    */
+   /* MIN tmp0.w, tmp0.w, 1.0 */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+        struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned i;
+
+   /* Look for writes to color output reg and replace it with
+    * color temp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == ts->color_out) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = ts->color_tmp;
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   /* add alpha modulation code at tail of program */
+   assert(ts->color_out != INVALID_INDEX);
+   assert(ts->color_tmp != INVALID_INDEX);
+
+   /* MOV output.color.xyz colorTmp */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_XYZ,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+   /* MUL output.color.w colorTmp.w tmp0.w */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_W,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp,
+                           TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in  -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ *                            that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index)
+{
+   struct aa_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = aa_decl;
+   transform.base.transform_instruction = aa_inst;
+   transform.base.transform_immediate = aa_immediate;
+   transform.base.prolog = aa_prolog;
+   transform.base.epilog = aa_epilog;
+
+   transform.tmp = INVALID_INDEX;
+   transform.color_out = INVALID_INDEX;
+   transform.color_tmp = INVALID_INDEX;
+
+   assert(aa_point_coord_index != -1);
+   transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+   transform.num_tmp = 0;
+   transform.num_imm = 0;
+   transform.num_input = 0;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.h b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
new file mode 100644
index 00000000000..d89f40cc389
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
-- 
cgit v1.2.3


From 84e71ef2ee4ab4a781aa86a7239036257e5992ce Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: util: add a couple primitive restart helper functions

The first function translates prim restart indexes to be 0xffff or
0xffffffff.

The second splits indexed primitives with restart indexes into sub-
primitives without restart indexes.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/Makefile.sources      |   2 +
 src/gallium/auxiliary/util/u_prim_restart.c | 267 ++++++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_prim_restart.h |  62 +++++++
 3 files changed, 331 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_prim_restart.c
 create mode 100644 src/gallium/auxiliary/util/u_prim_restart.h

(limited to 'src')

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 56fa84055b7..1fa36416b8e 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -266,6 +266,8 @@ C_SOURCES := \
 	util/u_pack_color.h \
 	util/u_pointer.h \
 	util/u_prim.h \
+	util/u_prim_restart.c \
+	util/u_prim_restart.h \
 	util/u_pstipple.c \
 	util/u_pstipple.h \
 	util/u_range.h \
diff --git a/src/gallium/auxiliary/util/u_prim_restart.c b/src/gallium/auxiliary/util/u_prim_restart.c
new file mode 100644
index 00000000000..a4d7c1433d9
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_prim_restart.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "u_inlines.h"
+#include "u_memory.h"
+#include "u_prim_restart.h"
+
+
+/**
+ * Translate an index buffer for primitive restart.
+ * Create a new index buffer which is a copy of the original index buffer
+ * except that instances of 'restart_index' are converted to 0xffff or
+ * 0xffffffff.
+ * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
+ */
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index)
+{
+   struct pipe_screen *screen = context->screen;
+   struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
+   void *src_map = NULL, *dst_map = NULL;
+   const unsigned src_index_size = src_buffer->index_size;
+   unsigned dst_index_size;
+
+   /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
+   dst_index_size = MAX2(2, src_buffer->index_size);
+   assert(dst_index_size == 2 || dst_index_size == 4);
+
+   /* no user buffers for now */
+   assert(src_buffer->user_buffer == NULL);
+
+   /* Create new index buffer */
+   *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
+                                    PIPE_USAGE_STREAM,
+                                    num_indexes * dst_index_size);
+   if (!*dst_buffer)
+      goto error;
+
+   /* Map new / dest index buffer */
+   dst_map = pipe_buffer_map(context, *dst_buffer,
+                             PIPE_TRANSFER_WRITE, &dst_transfer);
+   if (!dst_map)
+      goto error;
+
+   /* Map original / src index buffer */
+   src_map = pipe_buffer_map_range(context, src_buffer->buffer,
+                                   src_buffer->offset,
+                                   num_indexes * src_index_size,
+                                   PIPE_TRANSFER_READ,
+                                   &src_transfer);
+   if (!src_map)
+      goto error;
+
+   if (src_index_size == 1 && dst_index_size == 2) {
+      uint8_t *src = (uint8_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else if (src_index_size == 2 && dst_index_size == 2) {
+      uint16_t *src = (uint16_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else {
+      uint32_t *src = (uint32_t *) src_map;
+      uint32_t *dst = (uint32_t *) dst_map;
+      unsigned i;
+      assert(src_index_size == 4);
+      assert(dst_index_size == 4);
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i];
+      }
+   }
+
+   pipe_buffer_unmap(context, src_transfer);
+   pipe_buffer_unmap(context, dst_transfer);
+
+   return PIPE_OK;
+
+error:
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+   if (dst_transfer)
+      pipe_buffer_unmap(context, dst_transfer);
+   if (*dst_buffer)
+      screen->resource_destroy(screen, *dst_buffer);
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+/** Helper structs for util_draw_vbo_without_prim_restart() */
+
+struct range {
+   unsigned start, count;
+};
+
+struct range_info {
+   struct range *ranges;
+   unsigned count, max;
+};
+
+
+/**
+ * Helper function for util_draw_vbo_without_prim_restart()
+ * \return true for success, false if out of memory
+ */
+static boolean
+add_range(struct range_info *info, unsigned start, unsigned count)
+{
+   if (info->max == 0) {
+      info->max = 10;
+      info->ranges = MALLOC(info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+   }
+   else if (info->count == info->max) {
+      /* grow the ranges[] array */
+      info->ranges = REALLOC(info->ranges,
+                             info->max * sizeof(struct range),
+                             2 * info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+
+      info->max *= 2;
+   }
+
+   /* save the range */
+   info->ranges[info->count].start = start;
+   info->ranges[info->count].count = count;
+   info->count++;
+
+   return TRUE;
+}
+
+
+/**
+ * Implement primitive restart by breaking an indexed primitive into
+ * pieces which do not contain restart indexes.  Each piece is then
+ * drawn by calling pipe_context::draw_vbo().
+ * \return PIPE_OK if no error, an error code otherwise.
+ */
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info)
+{
+   const void *src_map;
+   struct range_info ranges = {0};
+   struct pipe_draw_info new_info;
+   struct pipe_transfer *src_transfer = NULL;
+   unsigned i, start, count;
+
+   assert(info->indexed);
+   assert(info->primitive_restart);
+
+   /* Get pointer to the index data */
+   if (ib->buffer) {
+      /* map the index buffer (only the range we need to scan) */
+      src_map = pipe_buffer_map_range(context, ib->buffer,
+                                      ib->offset + info->start * ib->index_size,
+                                      info->count * ib->index_size,
+                                      PIPE_TRANSFER_READ,
+                                      &src_transfer);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      if (!ib->user_buffer) {
+         debug_printf("User-space index buffer is null!");
+         return PIPE_ERROR_BAD_INPUT;
+      }
+      src_map = (const uint8_t *) ib->user_buffer
+         + ib->offset
+         + info->start * ib->index_size;
+   }
+
+#define SCAN_INDEXES(TYPE) \
+   for (i = 0; i <= info->count; i++) { \
+      if (i == info->count || \
+          ((const TYPE *) src_map)[i] == info->restart_index) { \
+         /* cut / restart */ \
+         if (count > 0) { \
+            if (!add_range(&ranges, info->start + start, count)) { \
+               if (src_transfer) \
+                  pipe_buffer_unmap(context, src_transfer); \
+               return PIPE_ERROR_OUT_OF_MEMORY; \
+            } \
+         } \
+         start = i + 1; \
+         count = 0; \
+      } \
+      else { \
+         count++; \
+      } \
+   }
+
+   start = info->start;
+   count = 0;
+   switch (ib->index_size) {
+   case 1:
+      SCAN_INDEXES(uint8_t);
+      break;
+   case 2:
+      SCAN_INDEXES(uint16_t);
+      break;
+   case 4:
+      SCAN_INDEXES(uint32_t);
+      break;
+   default:
+      assert(!"Bad index size");
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   /* unmap index buffer */
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+
+   /* draw ranges between the restart indexes */
+   new_info = *info;
+   new_info.primitive_restart = FALSE;
+   for (i = 0; i < ranges.count; i++) {
+      new_info.start = ranges.ranges[i].start;
+      new_info.count = ranges.ranges[i].count;
+      context->draw_vbo(context, &new_info);
+   }
+
+   FREE(ranges.ranges);
+
+   return PIPE_OK;
+}
diff --git a/src/gallium/auxiliary/util/u_prim_restart.h b/src/gallium/auxiliary/util/u_prim_restart.h
new file mode 100644
index 00000000000..1e98e0e1b22
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_prim_restart.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_PRIM_RESTART_H
+#define U_PRIM_RESTART_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_draw_info;
+struct pipe_index_buffer;
+struct pipe_resource;
+
+
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index);
+
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From 60aea3011525de8bf6506e08c913cbe24cc17767 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 1 Sep 2015 16:29:17 -0600
Subject: auxiliary/os: Don't implement os_get_option() on embedded builds.

Let it be defined externally instead, allowing setting mechanisms other
than environment variables.

Reviewed-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Matthew McClure <mcclurem@vmware.com>
---
 src/gallium/auxiliary/os/os_misc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c
index c46078bb07d..d6b83e90e3d 100644
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -96,11 +96,13 @@ os_log_message(const char *message)
 }
 
 
+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
 const char *
 os_get_option(const char *name)
 {
    return getenv(name);
 }
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */
 
 
 /**
-- 
cgit v1.2.3


From 0ad3a475ef81dad3baf607d749b91dfa1700ca23 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 25 Aug 2015 14:36:01 +1000
Subject: mesa: fix SwapBytes handling in numerous places

In a number of places the SwapBytes handling didn't handle cases with
GL_(UN)PACK_ALIGNMENT set and 7 byte width cases aligned to 8 bytes.

This adds a common routine to swap bytes a 2D image and uses this
code in:

texture storage
texture get
readpixels
swrast drawpixels.

[airlied: updated with Brian's nitpicks].

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/image.c       | 58 +++++++++++++++++++++++++++++++++++++++++----
 src/mesa/main/image.h       | 20 ++++++----------
 src/mesa/main/readpix.c     | 11 ++-------
 src/mesa/main/texgetimage.c | 14 +++--------
 src/mesa/main/texstore.c    | 28 +++++++++++++---------
 src/mesa/swrast/s_drawpix.c | 14 +++++------
 6 files changed, 89 insertions(+), 56 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index 711a19062a9..e79e3e68eac 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -49,8 +49,8 @@
  * \param src the array with the source data we want to byte-swap.
  * \param n number of words.
  */
-void
-_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
+static void
+swap2_copy( GLushort *dst, GLushort *src, GLuint n )
 {
    GLuint i;
    for (i = 0; i < n; i++) {
@@ -58,7 +58,11 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
    }
 }
 
-
+void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+   swap2_copy(p, p, n);
+}
 
 /*
  * Flip the order of the 4 bytes in each word in the given array (src) and
@@ -69,8 +73,8 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
  * \param src the array with the source data we want to byte-swap.
  * \param n number of words.
  */
-void
-_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
+static void
+swap4_copy( GLuint *dst, GLuint *src, GLuint n )
 {
    GLuint i, a, b;
    for (i = 0; i < n; i++) {
@@ -83,6 +87,11 @@ _mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
    }
 }
 
+void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+   swap4_copy(p, p, n);
+}
 
 /**
  * Return the byte offset of a specific pixel in an image (1D, 2D or 3D).
@@ -958,3 +967,42 @@ _mesa_clip_blit(struct gl_context *ctx,
 
    return GL_TRUE;
 }
+
+/**
+ * Swap the bytes in a 2D image.
+ *
+ * using the packing information this swaps the bytes
+ * according to the format and type of data being input.
+ * It takes into a/c various packing parameters like
+ * Alignment and RowLength.
+ */
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src)
+{
+   GLint swapSize = _mesa_sizeof_packed_type(type);
+
+   assert(packing->SwapBytes);
+
+   if (swapSize == 2 || swapSize == 4) {
+      int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+      int stride = _mesa_image_row_stride(packing, width, format, type);
+      int row;
+      uint8_t *dstrow;
+      const uint8_t *srcrow;
+      assert(swapsPerPixel > 0);
+      assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+      dstrow = dst;
+      srcrow = src;
+      for (row = 0; row < height; row++) {
+         if (swapSize == 2)
+            swap2_copy((GLushort *)dstrow, (GLushort *)srcrow, width * swapsPerPixel);
+         else if (swapSize == 4)
+            swap4_copy((GLuint *)dstrow, (GLuint *)srcrow, width * swapsPerPixel);
+         dstrow += stride;
+         srcrow += stride;
+      }
+   }
+}
diff --git a/src/mesa/main/image.h b/src/mesa/main/image.h
index 501586bfbd3..b5075be4b86 100644
--- a/src/mesa/main/image.h
+++ b/src/mesa/main/image.h
@@ -35,22 +35,11 @@ struct gl_pixelstore_attrib;
 struct gl_framebuffer;
 
 extern void
-_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
+_mesa_swap2(GLushort *p, GLuint n);
 
 extern void
-_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+_mesa_swap4(GLuint *p, GLuint n);
 
-static inline void
-_mesa_swap2(GLushort *p, GLuint n)
-{
-   _mesa_swap2_copy(p, p, n);
-}
-
-static inline void
-_mesa_swap4(GLuint *p, GLuint n)
-{
-   _mesa_swap4_copy(p, p, n);
-}
 
 extern GLintptr
 _mesa_image_offset( GLuint dimensions,
@@ -146,5 +135,10 @@ _mesa_clip_blit(struct gl_context *ctx,
                 GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                 GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);
 
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src);
 
 #endif
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 12779446c6d..03a484c438c 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -613,15 +613,8 @@ read_rgba_pixels( struct gl_context *ctx,
 done_swap:
    /* Handle byte swapping if required */
    if (packing->SwapBytes) {
-      GLint swapSize = _mesa_sizeof_packed_type(type);
-      if (swapSize == 2 || swapSize == 4) {
-         int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-         assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-         if (swapSize == 2)
-            _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
-         else if (swapSize == 4)
-            _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
-      }
+      _mesa_swap_bytes_2d_image(format, type, packing,
+                                width, height, dst, dst);
    }
 
 done_unmap:
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index f62553dac24..a1fd33851f2 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -557,17 +557,9 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
 
    do_swap:
       /* Handle byte swapping if required */
-      if (ctx->Pack.SwapBytes) {
-         GLint swapSize = _mesa_sizeof_packed_type(type);
-         if (swapSize == 2 || swapSize == 4) {
-            int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-            assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-            if (swapSize == 2)
-               _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
-            else if (swapSize == 4)
-               _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
-         }
-      }
+      if (ctx->Pack.SwapBytes)
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
 
       /* Unmap the src texture buffer */
       ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 539402668c4..e50964e79e4 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -727,19 +727,25 @@ texstore_rgba(TEXSTORE_PARAMS)
        */
       GLint swapSize = _mesa_sizeof_packed_type(srcType);
       if (swapSize == 2 || swapSize == 4) {
-         int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
-         int swapsPerPixel = bytesPerPixel / swapSize;
-         int elementCount = srcWidth * srcHeight * srcDepth;
-         assert(bytesPerPixel % swapSize == 0);
-         tempImage = malloc(elementCount * bytesPerPixel);
+         int imageStride = _mesa_image_image_stride(srcPacking, srcWidth, srcHeight, srcFormat, srcType);
+         int bufferSize = imageStride * srcDepth;
+         int layer;
+         const uint8_t *src;
+         uint8_t *dst;
+
+         tempImage = malloc(bufferSize);
          if (!tempImage)
             return GL_FALSE;
-         if (swapSize == 2)
-            _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
-                             elementCount * swapsPerPixel);
-         else
-            _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
-                             elementCount * swapsPerPixel);
+         src = srcAddr;
+         dst = tempImage;
+         for (layer = 0; layer < srcDepth; layer++) {
+            _mesa_swap_bytes_2d_image(srcFormat, srcType,
+                                      srcPacking,
+                                      srcWidth, srcHeight,
+                                      dst, src);
+            src += imageStride;
+            dst += imageStride;
+         }
          srcAddr = tempImage;
       }
    }
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c
index 5393d50ea02..f05528d0d27 100644
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -481,17 +481,17 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
           */
          GLint swapSize = _mesa_sizeof_packed_type(type);
          if (swapSize == 2 || swapSize == 4) {
-            int components = _mesa_components_in_format(format);
-            int elementCount = width * height * components;
-            tempImage = malloc(elementCount * swapSize);
+            int imageStride = _mesa_image_image_stride(unpack, width, height, format, type);
+
+            tempImage = malloc(imageStride);
             if (!tempImage) {
                _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
                return;
             }
-            if (swapSize == 2)
-               _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
-            else
-               _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+
+            _mesa_swap_bytes_2d_image(format, type, unpack,
+                                      width, height, tempImage, pixels);
+
             pixels = tempImage;
          }
       }
-- 
cgit v1.2.3


From 5b6c7da460b8f6c908df7060ec0709a9848ce160 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Tue, 25 Aug 2015 21:13:13 +1000
Subject: mesa: handle SwapBytes in compressed texture get code.

This case just wasn't handled, so add support for it.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/texgetimage.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index a1fd33851f2..682b72755c7 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -361,6 +361,13 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
                            tempSlice, RGBA32_FLOAT, srcStride,
                            width, height,
                            needsRebase ? rebaseSwizzle : NULL);
+
+      /* Handle byte swapping if required */
+      if (ctx->Pack.SwapBytes) {
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
+      }
+
       tempSlice += 4 * width * height;
    }
 
-- 
cgit v1.2.3


From 8185a02316cfb7dc3d64b8772af82ad2bb49754e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 30 Jul 2015 02:48:36 +0100
Subject: mesa/formats: add some formats from GL3.3

GL3.3 added GL_ARB_texture_rgb10_a2ui, which specifies
a lot more things than just rgb10/a2ui.

While playing with ogl conform one of the tests must
attempted all valid formats for GL3.3 and hits the
unreachable here.

This adds the first chunk of formats that hit the
assert.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/formats.c      | 63 ++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/formats.csv    | 12 +++++++++
 src/mesa/main/formats.h      | 12 +++++++++
 src/mesa/main/glformats.c    | 24 +++++++++++++++++
 src/mesa/swrast/s_texfetch.c | 12 +++++++++
 5 files changed, 123 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 587221ca5a0..6f4ee0d457a 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -936,6 +936,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
       return;
    case MESA_FORMAT_B5G6R5_UNORM:
    case MESA_FORMAT_R5G6B5_UNORM:
+   case MESA_FORMAT_B5G6R5_UINT:
+   case MESA_FORMAT_R5G6B5_UINT:
       *datatype = GL_UNSIGNED_SHORT_5_6_5;
       *comps = 3;
       return;
@@ -943,6 +945,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
    case MESA_FORMAT_B4G4R4A4_UNORM:
    case MESA_FORMAT_A4R4G4B4_UNORM:
    case MESA_FORMAT_B4G4R4X4_UNORM:
+   case MESA_FORMAT_B4G4R4A4_UINT:
+   case MESA_FORMAT_A4R4G4B4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
@@ -950,6 +954,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
    case MESA_FORMAT_B5G5R5A1_UNORM:
    case MESA_FORMAT_A1R5G5B5_UNORM:
    case MESA_FORMAT_B5G5R5X1_UNORM:
+   case MESA_FORMAT_B5G5R5A1_UINT:
+   case MESA_FORMAT_A1R5G5B5_UINT:
       *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
       *comps = 4;
       return;
@@ -960,6 +966,7 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
       return;
 
    case MESA_FORMAT_A1B5G5R5_UNORM:
+   case MESA_FORMAT_A1B5G5R5_UINT:
       *datatype = GL_UNSIGNED_SHORT_5_5_5_1;
       *comps = 4;
       return;
@@ -994,19 +1001,23 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
       return;
 
    case MESA_FORMAT_R3G3B2_UNORM:
+   case MESA_FORMAT_R3G3B2_UINT:
       *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
       *comps = 3;
       return;
    case MESA_FORMAT_A4B4G4R4_UNORM:
+   case MESA_FORMAT_A4B4G4R4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
 
    case MESA_FORMAT_R4G4B4A4_UNORM:
+   case MESA_FORMAT_R4G4B4A4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
    case MESA_FORMAT_R5G5B5A1_UNORM:
+   case MESA_FORMAT_R5G5B5A1_UINT:
       *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
       *comps = 4;
       return;
@@ -1022,6 +1033,7 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
       return;
 
    case MESA_FORMAT_B2G3R3_UNORM:
+   case MESA_FORMAT_B2G3R3_UINT:
       *datatype = GL_UNSIGNED_BYTE_3_3_2;
       *comps = 3;
       return;
@@ -1972,6 +1984,57 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
               type == GL_UNSIGNED_INT_2_10_10_10_REV &&
               !swapBytes);
 
+   case MESA_FORMAT_B5G6R5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+   case MESA_FORMAT_R5G6B5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+   case MESA_FORMAT_B2G3R3_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+   case MESA_FORMAT_R3G3B2_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B4G4R4A4_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A4R4G4B4_UINT:
+      return GL_FALSE;
+
+   case MESA_FORMAT_A1B5G5R5_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_B5G5R5A1_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A1R5G5B5_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_R5G5B5A1_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
    case MESA_FORMAT_R9G9B9E5_FLOAT:
       return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
          !swapBytes;
diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv
index 80729d98787..9c54ffe1fa7 100644
--- a/src/mesa/main/formats.csv
+++ b/src/mesa/main/formats.csv
@@ -190,6 +190,18 @@ MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2
 MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
 MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
 MESA_FORMAT_A2R10G10B10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , yzwx, rgb
+MESA_FORMAT_B5G6R5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , zyx1, rgb
+MESA_FORMAT_R5G6B5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , xyz1, rgb
+MESA_FORMAT_B2G3R3_UINT                   , packed, 1, 1, u2  , u3  ,  u3 ,     , zyx1, rgb
+MESA_FORMAT_R3G3B2_UINT                   , packed, 1, 1, u3  , u3  ,  u2 ,     , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , xyzw, rgb
+MESA_FORMAT_B4G4R4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , zyxw, rgb
+MESA_FORMAT_A4R4G4B4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , zyxw, rgb
+MESA_FORMAT_A1R5G5B5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , yzwx, rgb
+MESA_FORMAT_R5G5B5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , xyzw, rgb
 
 # Array signed/unsigned non-normalized integer formats
 MESA_FORMAT_A_UINT8                       , array , 1, 1, u8  ,     ,     ,     , 000x, rgb
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index ccb09b263ff..edad103c2cd 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -475,6 +475,18 @@ typedef enum
    MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
    MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
    MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+   MESA_FORMAT_B5G6R5_UINT,                          /* RRRR RGGG GGGB BBBB */
+   MESA_FORMAT_R5G6B5_UINT,                          /* BBBB BGGG GGGR RRRR */
+   MESA_FORMAT_B2G3R3_UINT,                                    /* RRRG GGBB */
+   MESA_FORMAT_R3G3B2_UINT,                                    /* BBGG GRRR */
+   MESA_FORMAT_A4B4G4R4_UINT,                        /* RRRR GGGG BBBB AAAA */
+   MESA_FORMAT_R4G4B4A4_UINT,                        /* AAAA BBBB GGGG RRRR */
+   MESA_FORMAT_B4G4R4A4_UINT,                        /* AAAA RRRR GGGG BBBB */
+   MESA_FORMAT_A4R4G4B4_UINT,                        /* BBBB GGGG RRRR AAAA */
+   MESA_FORMAT_A1B5G5R5_UINT,                        /* RRRR RGGG GGBB BBBA */
+   MESA_FORMAT_B5G5R5A1_UINT,                        /* ARRR RRGG GGGB BBBB */
+   MESA_FORMAT_A1R5G5B5_UINT,                        /* BBBB BGGG GGRR RRRA */
+   MESA_FORMAT_R5G5B5A1_UINT,                        /* ABBB BBGG GGGR RRRR */
 
    /* Array signed/unsigned non-normalized integer formats */
    MESA_FORMAT_A_UINT8,
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 3d12a01777c..18d08696c13 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2816,12 +2816,16 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
          return MESA_FORMAT_B5G6R5_UNORM;
       else if (format == GL_BGR)
          return MESA_FORMAT_R5G6B5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B5G6R5_UINT;
       break;
    case GL_UNSIGNED_SHORT_5_6_5_REV:
       if (format == GL_RGB)
          return MESA_FORMAT_R5G6B5_UNORM;
       else if (format == GL_BGR)
          return MESA_FORMAT_B5G6R5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R5G6B5_UINT;
       break;
    case GL_UNSIGNED_SHORT_4_4_4_4:
       if (format == GL_RGBA)
@@ -2830,6 +2834,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
          return MESA_FORMAT_A4R4G4B4_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_R4G4B4A4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A4B4G4R4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A4R4G4B4_UINT;
       break;
    case GL_UNSIGNED_SHORT_4_4_4_4_REV:
       if (format == GL_RGBA)
@@ -2838,26 +2846,42 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
          return MESA_FORMAT_B4G4R4A4_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_A4B4G4R4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R4G4B4A4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B4G4R4A4_UINT;
       break;
    case GL_UNSIGNED_SHORT_5_5_5_1:
       if (format == GL_RGBA)
          return MESA_FORMAT_A1B5G5R5_UNORM;
       else if (format == GL_BGRA)
          return MESA_FORMAT_A1R5G5B5_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A1B5G5R5_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A1R5G5B5_UINT;
       break;
    case GL_UNSIGNED_SHORT_1_5_5_5_REV:
       if (format == GL_RGBA)
          return MESA_FORMAT_R5G5B5A1_UNORM;
       else if (format == GL_BGRA)
          return MESA_FORMAT_B5G5R5A1_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R5G5B5A1_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B5G5R5A1_UINT;
       break;
    case GL_UNSIGNED_BYTE_3_3_2:
       if (format == GL_RGB)
          return MESA_FORMAT_B2G3R3_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B2G3R3_UINT;
       break;
    case GL_UNSIGNED_BYTE_2_3_3_REV:
       if (format == GL_RGB)
          return MESA_FORMAT_R3G3B2_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R3G3B2_UINT;
       break;
    case GL_UNSIGNED_INT_5_9_9_9_REV:
       if (format == GL_RGB)
diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c
index acb06e6ae92..d319b3a5349 100644
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -294,6 +294,18 @@ texfetch_funcs[] =
    FETCH_NULL(R10G10B10A2_UINT),
    FETCH_NULL(A2B10G10R10_UINT),
    FETCH_NULL(A2R10G10B10_UINT),
+   FETCH_NULL(B5G6R5_UINT),
+   FETCH_NULL(R5G6B5_UINT),
+   FETCH_NULL(B2G3R3_UINT),
+   FETCH_NULL(R3G3B2_UINT),
+   FETCH_NULL(A4B4G4R4_UINT),
+   FETCH_NULL(R4G4B4A4_UINT),
+   FETCH_NULL(B4G4R4A4_UINT),
+   FETCH_NULL(A4R4G4B4_UINT),
+   FETCH_NULL(A1B5G5R5_UINT),
+   FETCH_NULL(B5G5R5A1_UINT),
+   FETCH_NULL(A1R5G5B5_UINT),
+   FETCH_NULL(R5G5B5A1_UINT),
 
    /* Array signed/unsigned non-normalized integer formats */
    FETCH_NULL(A_UINT8),
-- 
cgit v1.2.3


From c3c242070e868225a81e1afe5fb424c33eb94c2f Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 30 Jul 2015 02:48:37 +0100
Subject: mesa/formats: 8-bit channel integer formats addition

Add enough 8-bit channel formats to handle all the
different things CTS throws at us.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/formats.c      | 43 +++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/formats.csv    |  4 ++++
 src/mesa/main/formats.h      |  5 +++++
 src/mesa/main/glformats.c    |  8 ++++++++
 src/mesa/swrast/s_texfetch.c |  4 ++++
 5 files changed, 64 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 6f4ee0d457a..982103f3798 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -926,6 +926,10 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
    case MESA_FORMAT_R8G8B8X8_UNORM:
    case MESA_FORMAT_B8G8R8X8_UNORM:
    case MESA_FORMAT_X8R8G8B8_UNORM:
+   case MESA_FORMAT_A8B8G8R8_UINT:
+   case MESA_FORMAT_R8G8B8A8_UINT:
+   case MESA_FORMAT_B8G8R8A8_UINT:
+   case MESA_FORMAT_A8R8G8B8_UINT:
       *datatype = GL_UNSIGNED_BYTE;
       *comps = 4;
       return;
@@ -2035,6 +2039,45 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
    case MESA_FORMAT_R5G5B5A1_UINT:
       return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
 
+   case MESA_FORMAT_A8B8G8R8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_A8R8G8B8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R8G8B8A8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B8G8R8A8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
    case MESA_FORMAT_R9G9B9E5_FLOAT:
       return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
          !swapBytes;
diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv
index 9c54ffe1fa7..11228e0f3df 100644
--- a/src/mesa/main/formats.csv
+++ b/src/mesa/main/formats.csv
@@ -186,6 +186,10 @@ MESA_FORMAT_RGBX_FLOAT32                  , array , 1, 1, f32 , f32 , f32 , x32
 MESA_FORMAT_Z_FLOAT32                     , array , 1, 1, f32 ,     ,     ,     , x___, zs
 
 # Packed signed/unsigned non-normalized integer formats
+MESA_FORMAT_A8B8G8R8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , wzyx, rgb
+MESA_FORMAT_A8R8G8B8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , yzwx, rgb
+MESA_FORMAT_R8G8B8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
+MESA_FORMAT_B8G8R8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , zyxw, rgb
 MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , zyxw, rgb
 MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
 MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index edad103c2cd..2b8146ce932 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -471,6 +471,11 @@ typedef enum
    MESA_FORMAT_Z_FLOAT32,
 
    /* Packed signed/unsigned non-normalized integer formats */
+
+   MESA_FORMAT_A8B8G8R8_UINT,    /* RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA */
+   MESA_FORMAT_A8R8G8B8_UINT,    /* BBBB BBBB GGGG GGGG RRRR RRRR AAAA AAAA */
+   MESA_FORMAT_R8G8B8A8_UINT,    /* AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR */
+   MESA_FORMAT_B8G8R8A8_UINT,    /* AAAA AAAA RRRR RRRR GGGG GGGG BBBB BBBB */
    MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
    MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
    MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 18d08696c13..c1d3c7df488 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2916,6 +2916,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
          return MESA_FORMAT_A8R8G8B8_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_R8G8B8A8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A8B8G8R8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A8R8G8B8_UINT;
       break;
    case GL_UNSIGNED_INT_8_8_8_8_REV:
       if (format == GL_RGBA)
@@ -2924,6 +2928,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
          return MESA_FORMAT_B8G8R8A8_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_A8B8G8R8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R8G8B8A8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B8G8R8A8_UINT;
       break;
    case GL_UNSIGNED_SHORT_8_8_MESA:
       if (format == GL_YCBCR_MESA)
diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c
index d319b3a5349..27de9b3e083 100644
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -290,6 +290,10 @@ texfetch_funcs[] =
    },
 
    /* Packed signed/unsigned non-normalized integer formats */
+   FETCH_NULL(A8B8G8R8_UINT),
+   FETCH_NULL(A8R8G8B8_UINT),
+   FETCH_NULL(R8G8B8A8_UINT),
+   FETCH_NULL(B8G8R8A8_UINT),
    FETCH_NULL(B10G10R10A2_UINT),
    FETCH_NULL(R10G10B10A2_UINT),
    FETCH_NULL(A2B10G10R10_UINT),
-- 
cgit v1.2.3


From 6a3e1fb958778e00e8fe2d860b6327fc4409c148 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Sep 2015 15:13:45 +1000
Subject: st/readpixels: fix accel path for skipimages.

We don't need to use the 3d image address here as that will
include SKIP_IMAGES, and we are only blitting a single
2D anyways, so just use the 2D path.

This fixes some memory overruns under CTS
 packed_pixels.packed_pixels_pixelstore when PACK_SKIP_IMAGES
is used.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/state_tracker/st_cb_readpixels.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index 6ff6cf6f6d6..bb36e6969d6 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -238,9 +238,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
       GLuint row;
 
       for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                               width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
          memcpy(dest, map, bytesPerRow);
          map += tex_xfer->stride;
       }
-- 
cgit v1.2.3


From b4a70401f52e5d7e08c94715b250ea1de8f63d15 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Sep 2015 15:44:46 +1000
Subject: texcompress_s3tc/fxt1: fix stride checks (v1.1)

The fastpath currently checks the RowLength != width, but
if you have a RowLength of 7, and Alignment of 4, then
that shouldn't match.

align the rowlength to the pack alignment before comparing.

This fixes compressed cases in CTS packed_pixels_pixelstore
test when SKIP_PIXELS is enabled, which causes row length
to get set.

v1.1: add fxt1 fix (Iago)

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/texcompress_fxt1.c | 2 +-
 src/mesa/main/texcompress_s3tc.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
index f06f048820d..d605e255962 100644
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -65,7 +65,7 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
    if (srcFormat != GL_RGB ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
       GLubyte *tempImageSlices[1];
diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c
index 7ce3cb88ec7..6cfe06a9910 100644
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
    if (srcFormat != GL_RGB ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -244,7 +244,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -300,7 +300,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
-- 
cgit v1.2.3


From 32769ac016dee4ce5767a922f91de47df4ce984d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Sep 2015 15:57:02 +1000
Subject: mesa/readpixels: check strides are equal before skipping conversion

The CTS packed_pixels test checks that readpixels doesn't write
into the space between rows, however we fail that here unless
we check the format and stride match.

This fixes all the core mesa problems with CTS packed_pixels
tests.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/readpix.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 03a484c438c..76ef8ee9487 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -523,7 +523,8 @@ read_rgba_pixels( struct gl_context *ctx,
        * convert to, then we can convert directly into the dst buffer and avoid
        * the final conversion/copy from the rgba buffer to the dst buffer.
        */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
          need_convert = false;
          rgba = dst;
       } else {
-- 
cgit v1.2.3


From 90ac5fb6bb8b663f722b5bc3fed1c10e27b8c050 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 25 Aug 2015 11:18:48 +1000
Subject: r600g/sb: dump sampler/resource index modes for textures.

This just aids debugging.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index 5232782791d..e6a6c16669a 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -466,6 +466,10 @@ void bc_dump::dump(fetch_node& n) {
 		for (unsigned k = 0; k < 3; ++k)
 			if (n.bc.offset[k])
 				s << " O" << chans[k] << ":" << n.bc.offset[k];
+		if (ctx.is_egcm() && n.bc.resource_index_mode)
+			s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
+		if (ctx.is_egcm() && n.bc.sampler_index_mode)
+			s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
 	}
 
 	sblog << s.str() << "\n";
-- 
cgit v1.2.3


From d503bbbf3082b5b301019721466d6fd2b41da03a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 9 Jul 2015 16:30:26 +1000
Subject: r600g: add support for streams to the assembler.

This just adds support to the assembler dumper and allows
stream instructions to be generated. Also fix up the stream
debugging to add stream info.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/eg_asm.c          | 1 +
 src/gallium/drivers/r600/r600_asm.c        | 2 ++
 src/gallium/drivers/r600/r600_asm.h        | 1 +
 src/gallium/drivers/r600/r600_shader.c     | 6 ++++--
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 3 +++
 5 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 42e8b0b1761..c32d317a1a5 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -115,6 +115,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 					S_SQ_CF_WORD1_BARRIER(1) |
 					S_SQ_CF_WORD1_COND(cf->cond) |
 					S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+					S_SQ_CF_WORD1_COUNT(cf->count) |
 					S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
 		}
 	}
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index b514c58f9d8..9b356bad4a9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->count && (cfop->flags & CF_EMIT))
+					fprintf(stderr, "STREAM%d ", cf->count);
 				if (cf->end_of_program)
 					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index e37d92672bd..b282907db5f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -149,6 +149,7 @@ struct r600_bytecode_cf {
 	unsigned			id;
 	unsigned			cond;
 	unsigned			pop_count;
+	unsigned			count;
 	unsigned			cf_addr; /* control flow addr */
 	struct r600_bytecode_kcache		kcache[4];
 	unsigned			r6xx_uses_waterfall;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 1ab389c830b..153f9a22cea 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -93,8 +93,10 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so)
 	for (i = 0; i < so->num_outputs; i++) {
 		unsigned mask = ((1 << so->output[i].num_components) - 1) <<
 				so->output[i].start_component;
-		fprintf(stderr, "  %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
-			i, so->output[i].output_buffer,
+		fprintf(stderr, "  %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
+			i,
+			so->output[i].stream,
+			so->output[i].output_buffer,
 			so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
 			so->output[i].register_index,
 			mask & 1 ? "x" : "",
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index e6a6c16669a..0fc73c419a6 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -182,6 +182,9 @@ void bc_dump::dump(cf_node& n) {
 
 		if (n.bc.pop_count)
 			s << " POP:" << n.bc.pop_count;
+
+		if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
+			s << " STREAM" << n.bc.count;
 	}
 
 	if (!n.bc.barrier)
-- 
cgit v1.2.3


From 3d497e0d915df8b71cd845c2cfbc6703db313628 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 9 Jul 2015 16:36:16 +1000
Subject: r600g/sb: add support for multiple streams to SB backend

This adds a peephole and removes an assert that isn't
actually valid with some of the stream emit instructions.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 --
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 18 +++++++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index dadee456a1f..522ff9d956e 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -764,8 +764,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 			mask |= (1 << chan);
 		}
 
-		assert(reg >= 0 && mask);
-
 		if (reg >= 0)
 			update_ngpr(reg);
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index c4799270d9f..19bd0784a61 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -757,10 +757,22 @@ int bc_parser::prepare_ir() {
 			c->bc.end_of_program = eop;
 
 		} else if (flags & CF_EMIT) {
-			c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+			/* quick peephole */
+			cf_node *prev = static_cast<cf_node *>(c->prev);
+			if (c->bc.op == CF_OP_CUT_VERTEX &&
+				prev && prev->is_valid() &&
+				prev->bc.op == CF_OP_EMIT_VERTEX &&
+				c->bc.count == prev->bc.count) {
+				prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
+				prev->bc.end_of_program = c->bc.end_of_program;
+				c->remove();
+			}
+			else {
+				c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
 
-			c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
-			c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+				c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+				c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 3bfa345c1eb3b0ec008d29fc0c62fe941412e4c6 Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Thu, 9 Jul 2015 16:37:28 +1000
Subject: r600g: add multiple stream support for geom shaders

This patch is taken from work by Glenn and myself,
and I've spent some time making it all work here.

This adds support for the multiple streams part of
ARB_gpu_shader5 to r600g.

It doesn't enable ARB_gpu_shader5 yet.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c |  29 +++--
 src/gallium/drivers/r600/r600_pipe.c       |   2 +-
 src/gallium/drivers/r600/r600_shader.c     | 203 +++++++++++++++++++++--------
 src/gallium/drivers/r600/r600_shader.h     |   5 +-
 src/gallium/drivers/r600/r600_state.c      |   6 +-
 5 files changed, 171 insertions(+), 74 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9ef92741879..0d4b5987246 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3012,8 +3012,12 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
 	struct r600_command_buffer *cb = &shader->command_buffer;
 	struct r600_shader *rshader = &shader->shader;
 	struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
-	unsigned gsvs_itemsize =
-			(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+	unsigned gsvs_itemsizes[4] = {
+			(cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2,
+			(cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2,
+			(cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2,
+			(cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2
+	};
 
 	r600_init_command_buffer(cb, 64);
 
@@ -3032,21 +3036,24 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
 				S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
 	}
 	r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
-	r600_store_value(cb, cp_shader->ring_item_size >> 2);
-	r600_store_value(cb, 0);
-	r600_store_value(cb, 0);
-	r600_store_value(cb, 0);
+	r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
+	r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
+	r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
+	r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
 
 	r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
-			       (rshader->ring_item_size) >> 2);
+			       (rshader->ring_item_sizes[0]) >> 2);
 
 	r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
-			       gsvs_itemsize);
+			       gsvs_itemsizes[0] +
+			       gsvs_itemsizes[1] +
+			       gsvs_itemsizes[2] +
+			       gsvs_itemsizes[3]);
 
 	r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
-	r600_store_value(cb, gsvs_itemsize);
-	r600_store_value(cb, gsvs_itemsize);
-	r600_store_value(cb, gsvs_itemsize);
+	r600_store_value(cb, gsvs_itemsizes[0]);
+	r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
+	r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]);
 
 	/* FIXME calculate these values somehow ??? */
 	r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index f6efaa312f2..fd9c16c6a96 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -358,7 +358,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
 		return 16384;
 	case PIPE_CAP_MAX_VERTEX_STREAMS:
-		return 1;
+		return family >= CHIP_CEDAR ? 4 : 1;
 
 	case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
 		return 2047;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 153f9a22cea..25a251e8187 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -311,7 +311,8 @@ struct r600_shader_ctx {
 	int					gs_out_ring_offset;
 	int					gs_next_vertex;
 	struct r600_shader	*gs_for_vs;
-	int					gs_export_gpr_treg;
+	int					gs_export_gpr_tregs[4];
+	const struct pipe_stream_output_info	*gs_stream_output_info;
 	unsigned				enabled_stream_buffers_mask;
 };
 
@@ -320,7 +321,7 @@ struct r600_shader_tgsi_instruction {
 	int (*process)(struct r600_shader_ctx *ctx);
 };
 
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -1337,9 +1338,11 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
+						  int stream, unsigned *stream_item_size)
 {
 	unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+	unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
 	int i, j, r;
 
 	/* Sanity checking. */
@@ -1359,8 +1362,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 
 	/* Initialize locations where the outputs are stored. */
 	for (i = 0; i < so->num_outputs; i++) {
-		so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
 
+		so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+		start_comp[i] = so->output[i].start_component;
 		/* Lower outputs with dst_offset < start_component.
 		 *
 		 * We can only output 4D vectors with a write mask, e.g. we can
@@ -1386,7 +1390,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 				if (r)
 					return r;
 			}
-			so->output[i].start_component = 0;
+			start_comp[i] = 0;
 			so_gpr[i] = tmp;
 		}
 	}
@@ -1395,18 +1399,21 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 	for (i = 0; i < so->num_outputs; i++) {
 		struct r600_bytecode_output output;
 
+		if (stream != -1 && stream != so->output[i].output_buffer)
+			continue;
+
 		memset(&output, 0, sizeof(struct r600_bytecode_output));
 		output.gpr = so_gpr[i];
-		output.elem_size = so->output[i].num_components;
-		output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+		output.elem_size = so->output[i].num_components - 1;
+		if (output.elem_size == 2)
+			output.elem_size = 3; // 3 not supported, write 4 with junk at end
+		output.array_base = so->output[i].dst_offset - start_comp[i];
 		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
 		output.burst_count = 1;
 		/* array_size is an upper limit for the burst_count
 		 * with MEM_STREAM instructions */
 		output.array_size = 0xFFF;
-		output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
-
-		ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+		output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
 
 		if (ctx->bc->chip_class >= EVERGREEN) {
 			switch (so->output[i].output_buffer) {
@@ -1423,6 +1430,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 				output.op = CF_OP_MEM_STREAM0_BUF3;
 				break;
 			}
+			output.op += so->output[i].stream * 4;
+			assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+			ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
 		} else {
 			switch (so->output[i].output_buffer) {
 			case 0:
@@ -1438,6 +1448,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 				output.op = CF_OP_MEM_STREAM3;
 					break;
 			}
+			ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
 		}
 		r = r600_bytecode_add_output(ctx->bc, &output);
 		if (r)
@@ -1490,7 +1501,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 	struct r600_bytecode_output output;
 	struct r600_bytecode_cf *cf_jump, *cf_pop,
 		*last_exp_pos = NULL, *last_exp_param = NULL;
-	int i, next_clip_pos = 61, next_param = 0;
+	int i, j, next_clip_pos = 61, next_param = 0;
+	int ring;
 
 	cshader = calloc(1, sizeof(struct r600_pipe_shader));
 	if (!cshader)
@@ -1510,6 +1522,9 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 
 	ctx.bc->isa = rctx->isa;
 
+	cf_jump = NULL;
+	memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
+
 	/* R0.x = R0.x & 0x3fffffff */
 	memset(&alu, 0, sizeof(alu));
 	alu.op = ALU_OP2_AND_INT;
@@ -1528,22 +1543,10 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 	alu.last = 1;
 	r600_bytecode_add_alu(ctx.bc, &alu);
 
-	/* PRED_SETE_INT __, R0.y, 0 */
-	memset(&alu, 0, sizeof(alu));
-	alu.op = ALU_OP2_PRED_SETE_INT;
-	alu.src[0].chan = 1;
-	alu.src[1].sel = V_SQ_ALU_SRC_0;
-	alu.execute_mask = 1;
-	alu.update_pred = 1;
-	alu.last = 1;
-	r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
-
-	r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
-	cf_jump = ctx.bc->cf_last;
-
 	/* fetch vertex data from GSVS ring */
 	for (i = 0; i < ocnt; ++i) {
 		struct r600_shader_io *out = &ctx.shader->output[i];
+
 		out->gpr = i + 1;
 		out->ring_offset = i * 16;
 
@@ -1553,6 +1556,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 		vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
 		vtx.offset = out->ring_offset;
 		vtx.dst_gpr = out->gpr;
+		vtx.src_gpr = 0;
 		vtx.dst_sel_x = 0;
 		vtx.dst_sel_y = 1;
 		vtx.dst_sel_z = 2;
@@ -1565,18 +1569,68 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 
 		r600_bytecode_add_vtx(ctx.bc, &vtx);
 	}
+	ctx.temp_reg = i + 1;
+	for (ring = 3; ring >= 0; --ring) {
+		bool enabled = false;
+		for (i = 0; i < so->num_outputs; i++) {
+			if (so->output[i].stream == ring) {
+				enabled = true;
+				break;
+			}
+		}
+		if (ring != 0 && !enabled) {
+			cshader->shader.ring_item_sizes[ring] = 0;
+			continue;
+		}
+
+		if (cf_jump) {
+			// Patch up jump label
+			r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+			cf_pop = ctx.bc->cf_last;
+
+			cf_jump->cf_addr = cf_pop->id + 2;
+			cf_jump->pop_count = 1;
+			cf_pop->cf_addr = cf_pop->id + 2;
+			cf_pop->pop_count = 1;
+		}
+
+		/* PRED_SETE_INT __, R0.y, ring */
+		memset(&alu, 0, sizeof(alu));
+		alu.op = ALU_OP2_PRED_SETE_INT;
+		alu.src[0].chan = 1;
+		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+		alu.src[1].value = ring;
+		alu.execute_mask = 1;
+		alu.update_pred = 1;
+		alu.last = 1;
+		r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
 
-	/* XXX handle clipvertex, streamout? */
-	emit_streamout(&ctx, so);
+		r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+		cf_jump = ctx.bc->cf_last;
+
+		if (enabled)
+			emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
+		cshader->shader.ring_item_sizes[ring] = ocnt * 16;
+	}
 
 	/* export vertex data */
 	/* XXX factor out common code with r600_shader_from_tgsi ? */
 	for (i = 0; i < ocnt; ++i) {
 		struct r600_shader_io *out = &ctx.shader->output[i];
-
+		bool instream0 = true;
 		if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
 			continue;
 
+		for (j = 0; j < so->num_outputs; j++) {
+			if (so->output[j].register_index == i) {
+				if (so->output[j].stream == 0)
+					break;
+				if (so->output[j].stream > 0)
+					instream0 = false;
+			}
+		}
+		if (!instream0)
+			continue;
 		memset(&output, 0, sizeof(output));
 		output.gpr = out->gpr;
 		output.elem_size = 3;
@@ -1722,19 +1776,19 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 	}
 
 	gs->gs_copy_shader = cshader;
+	cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 
 	ctx.bc->nstack = 1;
 
-	cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
-	cshader->shader.ring_item_size = ocnt * 16;
-
 	return r600_bytecode_build(ctx.bc);
 }
 
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind)
 {
 	struct r600_bytecode_output output;
 	int i, k, ring_offset;
+	int effective_stream = stream == -1 ? 0 : stream;
+	int idx = 0;
 
 	for (i = 0; i < ctx->shader->noutput; i++) {
 		if (ctx->gs_for_vs) {
@@ -1751,15 +1805,18 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
 
 			if (ring_offset == -1)
 				continue;
-		} else
-			ring_offset = i * 16;
+		} else {
+			ring_offset = idx * 16;
+			idx++;
+		}
 
+		if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
+			continue;
 		/* next_ring_offset after parsing input decls contains total size of
 		 * single vertex data, gs_next_vertex - current vertex index */
 		if (!ind)
 			ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
 
-		/* get a temp and add the ring offset to the next vertex base in the shader */
 		memset(&output, 0, sizeof(struct r600_bytecode_output));
 		output.gpr = ctx->shader->output[i].gpr;
 		output.elem_size = 3;
@@ -1770,28 +1827,39 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
 			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
 		else
 			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-		output.op = CF_OP_MEM_RING;
 
+		switch (stream) {
+		default:
+		case 0:
+			output.op = CF_OP_MEM_RING; break;
+		case 1:
+			output.op = CF_OP_MEM_RING1; break;
+		case 2:
+			output.op = CF_OP_MEM_RING2; break;
+		case 3:
+			output.op = CF_OP_MEM_RING3; break;
+		}
 
 		if (ind) {
 			output.array_base = ring_offset >> 2; /* in dwords */
 			output.array_size = 0xfff;
-			output.index_gpr = ctx->gs_export_gpr_treg;
+			output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
 		} else
 			output.array_base = ring_offset >> 2; /* in dwords */
 		r600_bytecode_add_output(ctx->bc, &output);
 	}
 
 	if (ind) {
+		/* get a temp and add the ring offset to the next vertex base in the shader */
 		struct r600_bytecode_alu alu;
 		int r;
 
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP2_ADD_INT;
-		alu.src[0].sel = ctx->gs_export_gpr_treg;
+		alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream];
 		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
 		alu.src[1].value = ctx->gs_out_ring_offset >> 4;
-		alu.dst.sel = ctx->gs_export_gpr_treg;
+		alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream];
 		alu.dst.write = 1;
 		alu.last = 1;
 		r = r600_bytecode_add_alu(ctx->bc, &alu);
@@ -1856,6 +1924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.next_ring_offset = 0;
 	ctx.gs_out_ring_offset = 0;
 	ctx.gs_next_vertex = 0;
+	ctx.gs_stream_output_info = &so;
 
 	shader->uses_index_registers = false;
 	ctx.face_gpr = -1;
@@ -1942,8 +2011,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
 
 	if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-		ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
-		ctx.temp_reg = ctx.bc->ar_reg + 4;
+		ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
+		ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
+		ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
+		ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
+		ctx.temp_reg = ctx.bc->ar_reg + 7;
 	} else {
 		ctx.temp_reg = ctx.bc->ar_reg + 3;
 	}
@@ -2006,7 +2078,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 		}
 	}
 	
-	shader->ring_item_size = ctx.next_ring_offset;
+	shader->ring_item_sizes[0] = ctx.next_ring_offset;
+	shader->ring_item_sizes[1] = 0;
+	shader->ring_item_sizes[2] = 0;
+	shader->ring_item_sizes[3] = 0;
 
 	/* Process two side if needed */
 	if (shader->two_side && ctx.colors_used) {
@@ -2129,17 +2204,18 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 		if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
 			struct r600_bytecode_alu alu;
 			int r;
-
-			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-			alu.op = ALU_OP1_MOV;
-			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
-			alu.src[0].value = 0;
-			alu.dst.sel = ctx.gs_export_gpr_treg;
-			alu.dst.write = 1;
-			alu.last = 1;
-			r = r600_bytecode_add_alu(ctx.bc, &alu);
-			if (r)
-				return r;
+			for (j = 0; j < 4; j++) {
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+				alu.op = ALU_OP1_MOV;
+				alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+				alu.src[0].value = 0;
+				alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+				alu.dst.write = 1;
+				alu.last = 1;
+				r = r600_bytecode_add_alu(ctx.bc, &alu);
+				if (r)
+					return r;
+			}
 		}
 		if (shader->two_side && ctx.colors_used) {
 			if ((r = process_twoside_color_inputs(&ctx)))
@@ -2240,14 +2316,20 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	/* Add stream outputs. */
 	if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
 	    so.num_outputs && !use_llvm)
-		emit_streamout(&ctx, &so);
+		emit_streamout(&ctx, &so, -1, NULL);
 
 	pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 	convert_edgeflag_to_int(&ctx);
 
 	if (ring_outputs) {
-		if (key.vs.as_es)
-			emit_gs_ring_writes(&ctx, FALSE);
+		if (key.vs.as_es) {
+			ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
+			ctx.gs_export_gpr_tregs[1] = -1;
+			ctx.gs_export_gpr_tregs[2] = -1;
+			ctx.gs_export_gpr_tregs[3] = -1;
+
+			emit_gs_ring_writes(&ctx, &so, -1, FALSE);
+		}
 	} else {
 		/* Export output */
 		next_clip_base = shader->vs_out_misc_write ? 62 : 61;
@@ -7198,10 +7280,17 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 
 static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
 {
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
+	int r;
+
 	if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
-		emit_gs_ring_writes(ctx, TRUE);
+		emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
 
-	return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+	r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+	if (!r)
+		ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
+	return r;
 }
 
 static int tgsi_umad(struct r600_shader_ctx *ctx)
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index f5ca9d67f1e..f49e1c1d64e 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -78,8 +78,9 @@ struct r600_shader {
 	/* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
 	boolean			uses_index_registers;
 
-	/* size in bytes of a data item in the ring (single vertex data) */
-	unsigned		ring_item_size;
+	/* Size in bytes of a data item in the ring(s) (single vertex data).
+	   Stages with only one ring items 123 will be set to 0. */
+	unsigned		ring_item_sizes[4];
 
 	unsigned		indirect_files;
 	unsigned		max_arrays;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index aff8f03f9b1..f2d24a3e37a 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2650,7 +2650,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	struct r600_shader *rshader = &shader->shader;
 	struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
 	unsigned gsvs_itemsize =
-			(cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+			(cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2;
 
 	r600_init_command_buffer(cb, 64);
 
@@ -2665,10 +2665,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 			       r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
 
 	r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
-	                       cp_shader->ring_item_size >> 2);
+	                       cp_shader->ring_item_sizes[0] >> 2);
 
 	r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
-			       (rshader->ring_item_size) >> 2);
+			       (rshader->ring_item_sizes[0]) >> 2);
 
 	r600_store_context_reg(cb, R_0288AC_SQ_GSVS_RING_ITEMSIZE,
 			       gsvs_itemsize);
-- 
cgit v1.2.3


From ee67fd70c22c6389fd90886ef3ddf5bf6601b3a0 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 20 Feb 2015 10:40:46 +1000
Subject: tgsi/scan: add uses_doubles to tgsi scanner

This allows drivers to work out if a shader contains any
double opcodes easily.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 4 ++++
 src/gallium/auxiliary/tgsi/tgsi_scan.h | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 7523baf4ce0..9810b5468d9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -100,6 +100,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
             assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
             info->opcode_count[fullinst->Instruction.Opcode]++;
 
+            if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+                fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+               info->uses_doubles = true;
+
             for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *src =
                   &fullinst->Src[i];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index b81bdd71f14..c5900bc05d4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -95,7 +95,7 @@ struct tgsi_shader_info
    boolean writes_viewport_index;
    boolean writes_layer;
    boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+   boolean uses_doubles; /**< uses any of the double instructions */
    unsigned clipdist_writemask;
    unsigned culldist_writemask;
    unsigned num_written_culldistance;
-- 
cgit v1.2.3


From 3be5ee1574e0a3494c0550f4b7ae09117855d749 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 20 Feb 2015 10:47:15 +1000
Subject: r600g: add doubles support for CAYMAN

Only a subset of AMD GPUs supported by r600g support doubles,
CAYMAN and CYPRESS are probably all we'll try and support, however
I don't have a CYPRESS so ignore that for now.

This disables SB support for doubles, as we think we need to
make the scheduler smarter to introduce delay slots.

[airlied: pushing this to avoid pain of rebasing, it mostly
works on cayman only so far, Glenn has some ideas about
delay slot issues we need to look into. turned off by
default for now]

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_asm.c    |  14 ++
 src/gallium/drivers/r600/r600_asm.h    |  15 ++
 src/gallium/drivers/r600/r600_isa.h    |   8 +-
 src/gallium/drivers/r600/r600_shader.c | 388 ++++++++++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_shader.h |   2 +
 5 files changed, 421 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 9b356bad4a9..bc6980660a5 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,6 +252,12 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 	return 0;
 }
 
+static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+{
+	const struct alu_op_info *op = r600_isa_alu(alu->op);
+	return (op->flags & AF_64);
+}
+
 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
@@ -576,6 +582,12 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 
 	for (i = 0; i < max_slots; ++i) {
 		if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
+
+			if (is_alu_64bit_inst(bc, prev[i])) {
+				gpr[i] = -1;
+				continue;
+			}
+
 			gpr[i] = prev[i]->dst.sel;
 			/* cube writes more than PV.X */
 			if (is_alu_reduction_inst(bc, prev[i]))
@@ -591,6 +603,8 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 		if(!alu)
 			continue;
 
+		if (is_alu_64bit_inst(bc, alu))
+			continue;
 		num_src = r600_bytecode_get_num_operands(bc, alu);
 		for (src = 0; src < num_src; ++src) {
 			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index b282907db5f..7cf3a090908 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -280,4 +280,19 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
 
 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
+
+static inline int fp64_switch(int i)
+{
+	switch (i) {
+	case 0:
+		return 1;
+	case 1:
+		return 0;
+	case 2:
+		return 3;
+	case 3:
+		return 2;
+	}
+	return 0;
+}
 #endif
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index fdbe1c00d20..27fc1e8e8be 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -339,11 +339,11 @@ static const struct alu_op_info alu_op_table[] = {
 		{"PRED_SETGT_64",             2, { 0x7C, 0xC7 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GT | AF_64 },
 		{"PRED_SETE_64",              2, { 0x7D, 0xC8 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_E | AF_64 },
 		{"PRED_SETGE_64",             2, { 0x7E, 0xC9 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GE | AF_64 },
-		{"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
+		{"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  AF_V,  AF_V,  AF_4V}, AF_64 },
 		{"ADD_64",                    2, { 0x17, 0xCB },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
 		{"MOVA_INT",                  1, { 0x18, 0xCC },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_MOVA },
-		{"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  AF_V,  AF_V,  AF_V},  0 },
-		{"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  AF_V,  AF_V,  AF_V},  0 },
+		{"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
+		{"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
 		{"SAD_ACCUM_PREV_UINT",       2, {   -1, 0xCF },{      0,     0,  AF_V,  AF_V},  AF_UINT_DST | AF_PREV_NEXT },
 		{"DOT",                       2, {   -1, 0xD0 },{      0,     0,  AF_V,  AF_V},  AF_PREV_NEXT },
 		{"MUL_PREV",                  1, {   -1, 0xD1 },{      0,     0,  AF_V,  AF_V},  AF_PREV_INTERLEAVE },
@@ -369,7 +369,7 @@ static const struct alu_op_info alu_op_table[] = {
 		{"FMA",                       3, {   -1, 0x07 },{      0,     0,  AF_V,  AF_V},  0 },
 		{"MULADD_INT24",              3, {   -1, 0x08 },{      0,     0,     0,  AF_V},  AF_INT_DST | AF_24 },
 		{"CNDNE_64",                  3, {   -1, 0x09 },{      0,     0,  AF_V,  AF_V},  AF_CMOV | AF_64 },
-		{"FMA_64",                    3, {   -1, 0x0A },{      0,     0,  AF_V,  AF_V},  AF_64 },
+		{"FMA_64",                    3, {   -1, 0x0A },{      0,     0,  AF_V,  AF_4V}, AF_64 },
 		{"LERP_UINT",                 3, {   -1, 0x0B },{      0,     0,  AF_V,  AF_V},  AF_UINT_DST },
 		{"BIT_ALIGN_INT",             3, {   -1, 0x0C },{      0,     0,  AF_V,  AF_V},  AF_INT_DST },
 		{"BYTE_ALIGN_INT",            3, {   -1, 0x0D },{      0,     0,  AF_V,  AF_V},  AF_INT_DST },
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 25a251e8187..1489298a097 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -47,7 +47,7 @@ MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
 These 8xx t-slot only opcodes become vector ops, with all four 
 slots expecting the arguments on sources a and b. Result is 
 broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
 These 8xx t-slot only opcodes become vector ops in the z, y, and 
 x slots.
 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
@@ -167,6 +167,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
     }
 	/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
 	use_sb &= !shader->shader.uses_index_registers;
+	/* disable SB for shaders using doubles */
+	use_sb &= !shader->shader.uses_doubles;
 
 	/* Check if the bytecode has already been built.  When using the llvm
 	 * backend, r600_shader_from_tgsi() will take care of building the
@@ -343,7 +345,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
 	int j;
 
-	if (i->Instruction.NumDstRegs > 1) {
+	if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
 		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
 		return -EINVAL;
 	}
@@ -1907,6 +1909,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.tokens = tokens;
 	tgsi_scan_shader(tokens, &ctx.info);
 	shader->indirect_files = ctx.info.indirect_files;
+
+	shader->uses_doubles = ctx.info.uses_doubles;
+
 	indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
 	tgsi_parse_init(&ctx.parse, tokens);
 	ctx.type = ctx.info.processor;
@@ -2679,6 +2684,167 @@ static int tgsi_last_instruction(unsigned writemask)
 	return lasti;
 }
 
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	unsigned write_mask = inst->Dst[0].Register.WriteMask;
+	struct r600_bytecode_alu alu;
+	int i, j, r, lasti = tgsi_last_instruction(write_mask);
+	int use_tmp = 0;
+
+	if (singledest) {
+		switch (write_mask) {
+		case 0x1:
+			write_mask = 0x3;
+			break;
+		case 0x2:
+			use_tmp = 1;
+			write_mask = 0x3;
+			break;
+		case 0x4:
+			write_mask = 0xc;
+			break;
+		case 0x8:
+			write_mask = 0xc;
+			use_tmp = 3;
+			break;
+		}
+	}
+
+	lasti = tgsi_last_instruction(write_mask);
+	for (i = 0; i <= lasti; i++) {
+
+		if (!(write_mask & (1 << i)))
+			continue;
+
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+		if (singledest) {
+			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			if (use_tmp) {
+				alu.dst.sel = ctx->temp_reg;
+				alu.dst.chan = i;
+				alu.dst.write = 1;
+			}
+			if (i == 1 || i == 3)
+				alu.dst.write = 0;
+		} else
+			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+		alu.op = ctx->inst_info->op;
+		if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		} else if (!swap) {
+			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+				r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+			}
+		} else {
+			r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
+		}
+
+		/* handle some special cases */
+		if (i == 1 || i == 3) {
+			switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
+			case TGSI_OPCODE_SUB:
+				r600_bytecode_src_toggle_neg(&alu.src[1]);
+				break;
+			case TGSI_OPCODE_DABS:
+				r600_bytecode_src_set_abs(&alu.src[0]);
+				break;
+			default:
+				break;
+			}
+		}
+		if (i == lasti) {
+			alu.last = 1;
+		}
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	if (use_tmp) {
+		write_mask = inst->Dst[0].Register.WriteMask;
+
+		/* move result from temp to dst */
+		for (i = 0; i <= lasti; i++) {
+			if (!(write_mask & (1 << i)))
+				continue;
+
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.op = ALU_OP1_MOV;
+			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			alu.src[0].sel = ctx->temp_reg;
+			alu.src[0].chan = use_tmp - 1;
+			alu.last = (i == lasti);
+
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+	}
+	return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	unsigned write_mask = inst->Dst[0].Register.WriteMask;
+	/* confirm writemaskiing */
+	if ((write_mask & 0x3) != 0x3 &&
+	    (write_mask & 0xc) != 0xc) {
+		fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
+		return -1;
+	}
+	return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+	return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
+{
+	return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int i, j, r;
+	int lasti = 3;
+	int tmp = r600_get_temp(ctx);
+
+	for (i = 0; i < lasti + 1; i++) {
+
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ctx->inst_info->op;
+		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
+		}
+
+		if (inst->Dst[0].Register.WriteMask & (1 << i))
+			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		else
+			alu.dst.sel = tmp;
+
+		alu.dst.chan = i;
+		alu.is_op3 = 1;
+		if (i == lasti) {
+			alu.last = 1;
+		}
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2795,6 +2961,152 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx)
 
 }
 
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int i, r;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+	for (i = 0; i < lasti + 1; i++) {
+
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+		if (i == 1 || i == 3)
+			r600_bytecode_src_toggle_neg(&alu.src[0]);
+		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+		if (i == lasti) {
+			alu.last = 1;
+		}
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+	return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	unsigned write_mask = inst->Dst[0].Register.WriteMask;
+	int i, j, r;
+	int firsti = write_mask == 0xc ? 2 : 0;
+
+	for (i = 0; i <= 3; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ctx->inst_info->op;
+
+		alu.dst.sel = ctx->temp_reg;
+		alu.dst.chan = i;
+		alu.dst.write = 1;
+		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+		}
+
+		if (i == 3)
+			alu.last = 1;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	/* MOV first two channels to writemask dst0 */
+	for (i = 0; i <= 1; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		alu.src[0].chan = i + 2;
+		alu.src[0].sel = ctx->temp_reg;
+
+		tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+		alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i <= 3; i++) {
+		if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+			/* MOV third channels to writemask dst1 */
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.op = ALU_OP1_MOV;
+			alu.src[0].chan = 1;
+			alu.src[0].sel = ctx->temp_reg;
+
+			tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+			alu.last = 1;
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+			break;
+		}
+	}
+	return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	int i, r;
+	struct r600_bytecode_alu alu;
+	int last_slot = 3;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+	int t1 = ctx->temp_reg;
+
+	/* these have to write the result to X/Y by the looks of it */
+	for (i = 0 ; i < last_slot; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ctx->inst_info->op;
+
+		/* should only be one src regs */
+		assert (inst->Instruction.NumSrcRegs == 1);
+
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+		r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+
+		/* RSQ should take the absolute value of src */
+		if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
+		    ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
+			r600_bytecode_src_set_abs(&alu.src[1]);
+		}
+		alu.dst.sel = t1;
+		alu.dst.chan = i;
+		alu.dst.write = (i == 0 || i == 1);
+
+		if (i == last_slot - 1)
+			alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	for (i = 0 ; i <= lasti; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		alu.src[0].sel = t1;
+		alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		alu.dst.write = 1;
+		if (i == lasti)
+			alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2873,6 +3185,55 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	int i, j, k, r;
+	struct r600_bytecode_alu alu;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+	int t1 = ctx->temp_reg;
+
+	for (k = 0; k <= 2; k++) {
+		if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+			continue;
+
+		for (i = 0; i < 4; i++) {
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.op = ctx->inst_info->op;
+			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+				r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+			}
+			alu.dst.sel = t1;
+			alu.dst.chan = i;
+			alu.dst.write = 1;
+			if (i == 3)
+				alu.last = 1;
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+	}
+
+	for (i = 0; i <= lasti; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		alu.src[0].sel = t1;
+		alu.src[0].chan = i;
+		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		alu.dst.write = 1;
+		if (i == lasti)
+			alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
 /*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -7965,5 +8326,28 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
 	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
 	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
+	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
+	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
+	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
+	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
+	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
+	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
+	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
+	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
+	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
+	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
+	[TGSI_OPCODE_D2I]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_I2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_D2U]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_U2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index f49e1c1d64e..48de9cdb156 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -89,6 +89,8 @@ struct r600_shader {
 	unsigned		vs_as_gs_a;
 	unsigned                ps_prim_id_input;
 	struct r600_shader_array * arrays;
+
+	boolean			uses_doubles;
 };
 
 union r600_shader_key {
-- 
cgit v1.2.3


From d2cab815b451d2ddc29cb332f96e79cdec25443d Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Wed, 12 Aug 2015 10:27:39 +1000
Subject: r600g: Add doubles support for CYPRESS

This doesn't enable the support, just adds some of
the code, so we don't have to keep rebasing.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 1489298a097..9043668a532 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2793,7 +2793,7 @@ static int tgsi_op2_64(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
-	/* confirm writemaskiing */
+	/* confirm writemasking */
 	if ((write_mask & 0x3) != 0x3 &&
 	    (write_mask & 0xc) != 0xc) {
 		fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
@@ -3082,7 +3082,7 @@ static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
 		alu.dst.chan = i;
 		alu.dst.write = (i == 0 || i == 1);
 
-		if (i == last_slot - 1)
+		if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
 			alu.last = 1;
 		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
@@ -8127,6 +8127,29 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_INTERP_CENTROID]	= { ALU_OP0_NOP, tgsi_interp_egcm},
 	[TGSI_OPCODE_INTERP_SAMPLE]	= { ALU_OP0_NOP, tgsi_interp_egcm},
 	[TGSI_OPCODE_INTERP_OFFSET]	= { ALU_OP0_NOP, tgsi_interp_egcm},
+	[TGSI_OPCODE_F2D]	= { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+	[TGSI_OPCODE_D2F]	= { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DABS]	= { ALU_OP1_MOV, tgsi_op2_64},
+	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
+	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
+	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
+	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
+	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},
+	[TGSI_OPCODE_DSLT]	= { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+	[TGSI_OPCODE_DSGE]	= { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DSEQ]	= { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DSNE]	= { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+	[TGSI_OPCODE_DRCP]	= { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+	[TGSI_OPCODE_DSQRT]	= { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+	[TGSI_OPCODE_DMAD]	= { ALU_OP3_FMA_64, tgsi_op3_64},
+	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
+	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
+	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
+	[TGSI_OPCODE_D2I]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_I2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_D2U]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_U2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
 
-- 
cgit v1.2.3


From 4de86e1371b0d59a5b9a787b726be3d373024647 Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Tue, 1 Sep 2015 17:02:20 +0200
Subject: i965/vec4: fill src_reg type using the constructor type parameter

The src_reg constructor that received the glsl_type was using it
only to build the swizzle, but not to fill this->type as dst_reg
is doing.

This caused some type mismatch between movs and alu operations
on the NIR path, so copy propagation optimization was not applied
to remove unneeded movs if negate modifier was involved. This was
first detected on minus (negate+add) operations.

Shader DB results (taking into account only vec4):

total instructions in shared programs: 20019 -> 19934 (-0.42%)
instructions in affected programs:     2918 -> 2833 (-2.91%)
helped:                                79
HURT:                                  0
GAINED:                                0
LOST:                                  0

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b97b6c13a13..501461c6d94 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -61,6 +61,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type)
       this->swizzle = brw_swizzle_for_size(type->vector_elements);
    else
       this->swizzle = BRW_SWIZZLE_XYZW;
+   if (type)
+      this->type = brw_type_for_base_type(type);
 }
 
 /** Generic unset register constructor. */
-- 
cgit v1.2.3


From f30cf3258e495a583e011e07d5b4a19031c5518f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 1 Sep 2015 09:31:15 +0100
Subject: meta: Compute correct buffer size with SkipRows/SkipPixels

If the user is specifying a subregion of a buffer using SKIP_ROWS and
SKIP_PIXELS, we must compute the buffer size carefully as the end of the
last row may be much shorter than stride*image_height*depth. The current
code tries to memcpy from beyond the end of the user data, for example
causing:

==28136== Invalid read of size 8
==28136==    at 0x4C2D94E: memcpy@@GLIBC_2.14 (vg_replace_strmem.c:915)
==28136==    by 0xB4ADFE3: brw_bo_write (brw_batch.c:1856)
==28136==    by 0xB5B3531: brw_buffer_data (intel_buffer_objects.c:208)
==28136==    by 0xB0F6275: _mesa_buffer_data (bufferobj.c:1600)
==28136==    by 0xB0F6346: _mesa_BufferData (bufferobj.c:1631)
==28136==    by 0xB37A1EE: create_texture_for_pbo (meta_tex_subimage.c:103)
==28136==    by 0xB37A467: _mesa_meta_pbo_TexSubImage (meta_tex_subimage.c:176)
==28136==    by 0xB5C8D61: intelTexSubImage (intel_tex_subimage.c:195)
==28136==    by 0xB254AB4: _mesa_texture_sub_image (teximage.c:3654)
==28136==    by 0xB254C9F: texsubimage (teximage.c:3712)
==28136==    by 0xB2550E9: _mesa_TexSubImage2D (teximage.c:3853)
==28136==    by 0x401CA0: UploadTexSubImage2D (teximage.c:171)
==28136==  Address 0xd8bfbe0 is 0 bytes after a block of size 1,024 alloc'd
==28136==    at 0x4C28C20: malloc (vg_replace_malloc.c:296)
==28136==    by 0x402014: PerfDraw (teximage.c:270)
==28136==    by 0x402648: Draw (glmain.c:182)
==28136==    by 0x8385E63: ??? (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x83896C8: fgEnumWindows (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x838641C: glutMainLoopEvent (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x8386C1C: glutMainLoop (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x4019C1: main (glmain.c:262)
==28136==
==28136== Invalid read of size 8
==28136==    at 0x4C2D940: memcpy@@GLIBC_2.14 (vg_replace_strmem.c:915)
==28136==    by 0xB4ADFE3: brw_bo_write (brw_batch.c:1856)
==28136==    by 0xB5B3531: brw_buffer_data (intel_buffer_objects.c:208)
==28136==    by 0xB0F6275: _mesa_buffer_data (bufferobj.c:1600)
==28136==    by 0xB0F6346: _mesa_BufferData (bufferobj.c:1631)
==28136==    by 0xB37A1EE: create_texture_for_pbo (meta_tex_subimage.c:103)
==28136==    by 0xB37A467: _mesa_meta_pbo_TexSubImage (meta_tex_subimage.c:176)
==28136==    by 0xB5C8D61: intelTexSubImage (intel_tex_subimage.c:195)
==28136==    by 0xB254AB4: _mesa_texture_sub_image (teximage.c:3654)
==28136==    by 0xB254C9F: texsubimage (teximage.c:3712)
==28136==    by 0xB2550E9: _mesa_TexSubImage2D (teximage.c:3853)
==28136==    by 0x401CA0: UploadTexSubImage2D (teximage.c:171)
==28136==  Address 0xd8bfbe8 is 8 bytes after a block of size 1,024 alloc'd
==28136==    at 0x4C28C20: malloc (vg_replace_malloc.c:296)
==28136==    by 0x402014: PerfDraw (teximage.c:270)
==28136==    by 0x402648: Draw (glmain.c:182)
==28136==    by 0x8385E63: ??? (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x83896C8: fgEnumWindows (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x838641C: glutMainLoopEvent (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x8386C1C: glutMainLoop (in /usr/lib/x86_64-linux-gnu/libglut.so.3.9.0)
==28136==    by 0x4019C1: main (glmain.c:262)
==28136==

Fixes regression from commit 7f396189f073d626c5f7a2c232dac92b65f5a23f
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date:   Mon Jan 5 18:17:04 2015 -0800

    meta: Add a BlitFramebuffers-based implementation of TexSubImage

v2: However, the teximage we create does need to be width x full_height x 1

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jason Ekstrand <jason.ekstrand@intel.com>
Cc: Neil Roberts <neil@linux.intel.com>
Reviewed-by Neil Roberts <neil@linux.intel.com>
---
 src/mesa/drivers/common/meta_tex_subimage.c | 45 +++++++++++++++++++----------
 1 file changed, 30 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index 16d8f5d4747..33c22aa139d 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -46,8 +46,9 @@
 #include "varray.h"
 
 static struct gl_texture_image *
-create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
-                       GLenum pbo_target, int width, int height,
+create_texture_for_pbo(struct gl_context *ctx,
+                       bool create_pbo, GLenum pbo_target,
+                       int dims, int width, int height, int depth,
                        GLenum format, GLenum type, const void *pixels,
                        const struct gl_pixelstore_attrib *packing,
                        GLuint *tmp_pbo, GLuint *tmp_tex)
@@ -73,13 +74,18 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
       return NULL;
 
    /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
-   pixels = _mesa_image_address3d(packing, pixels,
-                                  width, height, format, type, 0, 0, 0);
+   uint32_t first_pixel = _mesa_image_offset(dims, packing, width, height,
+                                             format, type,
+                                             0, 0, 0);
+   uint32_t last_pixel =  _mesa_image_offset(dims, packing, width, height,
+                                             format, type,
+                                             depth-1, height-1, width);
    row_stride = _mesa_image_row_stride(packing, width, format, type);
 
    if (_mesa_is_bufferobj(packing->BufferObj)) {
       *tmp_pbo = 0;
       buffer_obj = packing->BufferObj;
+      first_pixel += (intptr_t)pixels;
    } else {
       bool is_pixel_pack = pbo_target == GL_PIXEL_PACK_BUFFER;
 
@@ -97,14 +103,18 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
        * data to avoid unnecessary data copying in _mesa_BufferData().
        */
       if (is_pixel_pack)
-         _mesa_BufferData(pbo_target, row_stride * height, NULL,
+         _mesa_BufferData(pbo_target,
+                          last_pixel - first_pixel,
+                          NULL,
                           GL_STREAM_READ);
       else
-         _mesa_BufferData(pbo_target, row_stride * height, pixels,
+         _mesa_BufferData(pbo_target,
+                          last_pixel - first_pixel,
+                          (char *)pixels + first_pixel,
                           GL_STREAM_DRAW);
 
       buffer_obj = packing->BufferObj;
-      pixels = NULL;
+      first_pixel = 0;
 
       _mesa_BindBuffer(pbo_target, 0);
    }
@@ -119,14 +129,21 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
 
    internal_format = _mesa_get_format_base_format(pbo_format);
 
+   /* The texture is addressed as a single very-tall image, so we
+    * need to pack the multiple image depths together taking the
+    * inter-image padding into account.
+    */
+   int image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
+   int full_height = image_height * (depth - 1) + height;
+
    tex_image = _mesa_get_tex_image(ctx, tex_obj, tex_obj->Target, 0);
-   _mesa_init_teximage_fields(ctx, tex_image, width, height, 1,
+   _mesa_init_teximage_fields(ctx, tex_image, width, full_height, 1,
                               0, internal_format, pbo_format);
 
    read_only = pbo_target == GL_PIXEL_UNPACK_BUFFER;
    if (!ctx->Driver.SetTextureStorageForBufferObject(ctx, tex_obj,
                                                      buffer_obj,
-                                                     (intptr_t)pixels,
+                                                     first_pixel,
                                                      row_stride,
                                                      read_only)) {
       _mesa_DeleteTextures(1, tmp_tex);
@@ -147,7 +164,7 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
                            const struct gl_pixelstore_attrib *packing)
 {
    GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
-   int full_height, image_height;
+   int image_height;
    struct gl_texture_image *pbo_tex_image;
    GLenum status;
    bool success = false;
@@ -171,11 +188,10 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
     * property.
     */
    image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
-   full_height = image_height * (depth - 1) + height;
 
    pbo_tex_image = create_texture_for_pbo(ctx, create_pbo,
                                           GL_PIXEL_UNPACK_BUFFER,
-                                          width, full_height,
+                                          dims, width, height, depth,
                                           format, type, pixels, packing,
                                           &pbo, &pbo_tex);
    if (!pbo_tex_image)
@@ -277,7 +293,7 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                               const struct gl_pixelstore_attrib *packing)
 {
    GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
-   int full_height, image_height;
+   int image_height;
    struct gl_texture_image *pbo_tex_image;
    struct gl_renderbuffer *rb = NULL;
    GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
@@ -324,10 +340,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
     * property.
     */
    image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
-   full_height = image_height * (depth - 1) + height;
 
    pbo_tex_image = create_texture_for_pbo(ctx, false, GL_PIXEL_PACK_BUFFER,
-                                          width, full_height * depth,
+                                          dims, width, height, depth,
                                           format, type, pixels, packing,
                                           &pbo, &pbo_tex);
    if (!pbo_tex_image)
-- 
cgit v1.2.3


From 75f92e28b40ced21fb92df6e00277733bbca7f07 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 29 Jul 2015 11:23:29 -0600
Subject: winsys/svga: add new vmw_query.c[h] files

Functions for creating, destroying, getting queries, etc.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/winsys/svga/drm/Makefile.sources |   4 +-
 src/gallium/winsys/svga/drm/vmw_query.c      | 144 +++++++++++++++++++++++++++
 src/gallium/winsys/svga/drm/vmw_query.h      |  67 +++++++++++++
 3 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/winsys/svga/drm/vmw_query.c
 create mode 100644 src/gallium/winsys/svga/drm/vmw_query.h

(limited to 'src')

diff --git a/src/gallium/winsys/svga/drm/Makefile.sources b/src/gallium/winsys/svga/drm/Makefile.sources
index ab2b9321f3e..f82b0097b5b 100644
--- a/src/gallium/winsys/svga/drm/Makefile.sources
+++ b/src/gallium/winsys/svga/drm/Makefile.sources
@@ -17,4 +17,6 @@ C_SOURCES := \
 	vmw_surface.c \
 	vmw_surface.h \
 	vmw_shader.c \
-	vmw_shader.h
+	vmw_shader.h \
+	vmw_query.c \
+	vmw_query.h
diff --git a/src/gallium/winsys/svga/drm/vmw_query.c b/src/gallium/winsys/svga/drm/vmw_query.c
new file mode 100644
index 00000000000..7baf2c1d7ae
--- /dev/null
+++ b/src/gallium/winsys/svga/drm/vmw_query.c
@@ -0,0 +1,144 @@
+/**********************************************************
+ * Copyright 2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipebuffer/pb_bufmgr.h"
+#include "util/u_memory.h"
+
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+#include "vmw_query.h"
+
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+                             uint32 queryResultLen)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct pb_manager *provider = vws->pools.gmr;
+   struct pb_desc desc = {0};
+   struct pb_buffer *pb_buf;
+   struct svga_winsys_gb_query *query;
+
+   query = CALLOC_STRUCT(svga_winsys_gb_query);
+   if (!query)
+      return NULL;
+
+   /* Allocate memory to hold queries for this context */
+   desc.alignment = 4096;
+   pb_buf = provider->create_buffer(provider, queryResultLen, &desc);
+   query->buf = vmw_svga_winsys_buffer_wrap(pb_buf);
+
+   if (!query->buf) {
+      debug_printf("Failed to allocate memory for queries\n");
+      FREE(query);
+      query = NULL;
+   }
+
+   return query;
+}
+
+
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+                              struct svga_winsys_gb_query *query)
+{
+   vmw_svga_winsys_buffer_destroy(sws, query->buf);
+   FREE(query);
+}
+
+
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+                           struct svga_winsys_gb_query *query,
+                           unsigned offset,
+                           SVGA3dQueryState queryState)
+{
+   SVGA3dQueryState *state;
+
+   state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+                                       query->buf,
+                                       PIPE_TRANSFER_WRITE);
+   if (!state) {
+      debug_printf("Failed to map query result memory for initialization\n");
+      return -1;
+   }
+
+   /* Initialize the query state for the specified query slot */
+   state = (SVGA3dQueryState *)((char *)state + offset);
+   *state = queryState;
+
+   vmw_svga_winsys_buffer_unmap(sws, query->buf);
+
+   return 0;
+}
+
+
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+                                 struct svga_winsys_gb_query *query,
+                                 unsigned offset,
+                                 SVGA3dQueryState *queryState,
+                                 void *result, uint32 resultLen)
+{
+   SVGA3dQueryState *state;
+
+   state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+                                       query->buf,
+                                       PIPE_TRANSFER_READ);
+   if (!state) {
+      debug_printf("Failed to lock query result memory\n");
+
+      if (queryState)
+         *queryState = SVGA3D_QUERYSTATE_FAILED;
+
+      return;
+   }
+
+   state = (SVGA3dQueryState *)((char *)state + offset);
+
+   if (queryState)
+      *queryState = *state;
+
+   if (result) {
+      memcpy(result, state + 1, resultLen);
+   }
+
+   vmw_svga_winsys_buffer_unmap(sws, query->buf);
+}
+
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc, 
+                   struct svga_winsys_gb_query *query,
+                   unsigned flags)
+{
+   /* no-op on Linux */
+   return PIPE_OK;
+}
+
diff --git a/src/gallium/winsys/svga/drm/vmw_query.h b/src/gallium/winsys/svga/drm/vmw_query.h
new file mode 100644
index 00000000000..a8b58e6182f
--- /dev/null
+++ b/src/gallium/winsys/svga/drm/vmw_query.h
@@ -0,0 +1,67 @@
+/**********************************************************
+ * Copyright 2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef VMW_DRM_QUERY_H
+#define VMW_DRM_QUERY_H
+
+#include "svga3d_reg.h"
+
+
+
+/** Guest-backed query */
+struct svga_winsys_gb_query
+{
+   struct svga_winsys_buffer *buf;
+};
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+                             uint32 queryResultLen);
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+                              struct svga_winsys_gb_query *query);
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+                           struct svga_winsys_gb_query *query,
+                           unsigned offset,
+                           SVGA3dQueryState queryState);
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState *queryState,
+                       void *result, uint32 resultLen);
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc, 
+                   struct svga_winsys_gb_query *query,
+                   unsigned flags);
+
+#endif /* VMW_DRM_QUERY_H */
+
-- 
cgit v1.2.3


From 3a9252670450b80e333fadeb7c85367ab15a273e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 14:56:03 -0600
Subject: svga: add new version 10 device header files

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../drivers/svga/include/VGPU10ShaderTokens.h      |  489 +++++++
 src/gallium/drivers/svga/include/svga3d_dx.h       | 1521 ++++++++++++++++++++
 2 files changed, 2010 insertions(+)
 create mode 100644 src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
 create mode 100644 src/gallium/drivers/svga/include/svga3d_dx.h

(limited to 'src')

diff --git a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
new file mode 100644
index 00000000000..4440235890c
--- /dev/null
+++ b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
@@ -0,0 +1,489 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * VGPU10ShaderTokens.h --
+ *
+ *    VGPU10 shader token definitions.
+ *
+ */
+
+#ifndef VGPU10SHADERTOKENS_H
+#define VGPU10SHADERTOKENS_H
+
+/* Shader limits */
+#define VGPU10_MAX_VS_INPUTS 16
+#define VGPU10_MAX_VS_OUTPUTS 16
+#define VGPU10_MAX_GS_INPUTS 16
+#define VGPU10_MAX_GS_OUTPUTS 32
+#define VGPU10_MAX_FS_INPUTS 32
+#define VGPU10_MAX_FS_OUTPUTS 8
+#define VGPU10_MAX_TEMPS 4096
+#define VGPU10_MAX_CONSTANT_BUFFERS 14
+#define VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_SAMPLERS 16
+#define VGPU10_MAX_RESOURCES 128
+#define VGPU10_MIN_TEXEL_FETCH_OFFSET -8
+#define VGPU10_MAX_TEXEL_FETCH_OFFSET 7
+
+typedef enum {
+   VGPU10_PIXEL_SHADER = 0,
+   VGPU10_VERTEX_SHADER = 1,
+   VGPU10_GEOMETRY_SHADER = 2
+} VGPU10_PROGRAM_TYPE;
+
+typedef union {
+   struct {
+      unsigned int minorVersion  : 4;
+      unsigned int majorVersion  : 4;
+      unsigned int               : 8;
+      unsigned int programType   : 16; /* VGPU10_PROGRAM_TYPE */
+   };
+   uint32 value;
+} VGPU10ProgramToken;
+
+
+typedef enum {
+   VGPU10_OPCODE_ADD                               = 0,
+   VGPU10_OPCODE_AND                               = 1,
+   VGPU10_OPCODE_BREAK                             = 2,
+   VGPU10_OPCODE_BREAKC                            = 3,
+   VGPU10_OPCODE_CALL                              = 4,
+   VGPU10_OPCODE_CALLC                             = 5,
+   VGPU10_OPCODE_CASE                              = 6,
+   VGPU10_OPCODE_CONTINUE                          = 7,
+   VGPU10_OPCODE_CONTINUEC                         = 8,
+   VGPU10_OPCODE_CUT                               = 9,
+   VGPU10_OPCODE_DEFAULT                           = 10,
+   VGPU10_OPCODE_DERIV_RTX                         = 11,
+   VGPU10_OPCODE_DERIV_RTY                         = 12,
+   VGPU10_OPCODE_DISCARD                           = 13,
+   VGPU10_OPCODE_DIV                               = 14,
+   VGPU10_OPCODE_DP2                               = 15,
+   VGPU10_OPCODE_DP3                               = 16,
+   VGPU10_OPCODE_DP4                               = 17,
+   VGPU10_OPCODE_ELSE                              = 18,
+   VGPU10_OPCODE_EMIT                              = 19,
+   VGPU10_OPCODE_EMITTHENCUT                       = 20,
+   VGPU10_OPCODE_ENDIF                             = 21,
+   VGPU10_OPCODE_ENDLOOP                           = 22,
+   VGPU10_OPCODE_ENDSWITCH                         = 23,
+   VGPU10_OPCODE_EQ                                = 24,
+   VGPU10_OPCODE_EXP                               = 25,
+   VGPU10_OPCODE_FRC                               = 26,
+   VGPU10_OPCODE_FTOI                              = 27,
+   VGPU10_OPCODE_FTOU                              = 28,
+   VGPU10_OPCODE_GE                                = 29,
+   VGPU10_OPCODE_IADD                              = 30,
+   VGPU10_OPCODE_IF                                = 31,
+   VGPU10_OPCODE_IEQ                               = 32,
+   VGPU10_OPCODE_IGE                               = 33,
+   VGPU10_OPCODE_ILT                               = 34,
+   VGPU10_OPCODE_IMAD                              = 35,
+   VGPU10_OPCODE_IMAX                              = 36,
+   VGPU10_OPCODE_IMIN                              = 37,
+   VGPU10_OPCODE_IMUL                              = 38,
+   VGPU10_OPCODE_INE                               = 39,
+   VGPU10_OPCODE_INEG                              = 40,
+   VGPU10_OPCODE_ISHL                              = 41,
+   VGPU10_OPCODE_ISHR                              = 42,
+   VGPU10_OPCODE_ITOF                              = 43,
+   VGPU10_OPCODE_LABEL                             = 44,
+   VGPU10_OPCODE_LD                                = 45,
+   VGPU10_OPCODE_LD_MS                             = 46,
+   VGPU10_OPCODE_LOG                               = 47,
+   VGPU10_OPCODE_LOOP                              = 48,
+   VGPU10_OPCODE_LT                                = 49,
+   VGPU10_OPCODE_MAD                               = 50,
+   VGPU10_OPCODE_MIN                               = 51,
+   VGPU10_OPCODE_MAX                               = 52,
+   VGPU10_OPCODE_CUSTOMDATA                        = 53,
+   VGPU10_OPCODE_MOV                               = 54,
+   VGPU10_OPCODE_MOVC                              = 55,
+   VGPU10_OPCODE_MUL                               = 56,
+   VGPU10_OPCODE_NE                                = 57,
+   VGPU10_OPCODE_NOP                               = 58,
+   VGPU10_OPCODE_NOT                               = 59,
+   VGPU10_OPCODE_OR                                = 60,
+   VGPU10_OPCODE_RESINFO                           = 61,
+   VGPU10_OPCODE_RET                               = 62,
+   VGPU10_OPCODE_RETC                              = 63,
+   VGPU10_OPCODE_ROUND_NE                          = 64,
+   VGPU10_OPCODE_ROUND_NI                          = 65,
+   VGPU10_OPCODE_ROUND_PI                          = 66,
+   VGPU10_OPCODE_ROUND_Z                           = 67,
+   VGPU10_OPCODE_RSQ                               = 68,
+   VGPU10_OPCODE_SAMPLE                            = 69,
+   VGPU10_OPCODE_SAMPLE_C                          = 70,
+   VGPU10_OPCODE_SAMPLE_C_LZ                       = 71,
+   VGPU10_OPCODE_SAMPLE_L                          = 72,
+   VGPU10_OPCODE_SAMPLE_D                          = 73,
+   VGPU10_OPCODE_SAMPLE_B                          = 74,
+   VGPU10_OPCODE_SQRT                              = 75,
+   VGPU10_OPCODE_SWITCH                            = 76,
+   VGPU10_OPCODE_SINCOS                            = 77,
+   VGPU10_OPCODE_UDIV                              = 78,
+   VGPU10_OPCODE_ULT                               = 79,
+   VGPU10_OPCODE_UGE                               = 80,
+   VGPU10_OPCODE_UMUL                              = 81,
+   VGPU10_OPCODE_UMAD                              = 82,
+   VGPU10_OPCODE_UMAX                              = 83,
+   VGPU10_OPCODE_UMIN                              = 84,
+   VGPU10_OPCODE_USHR                              = 85,
+   VGPU10_OPCODE_UTOF                              = 86,
+   VGPU10_OPCODE_XOR                               = 87,
+   VGPU10_OPCODE_DCL_RESOURCE                      = 88,
+   VGPU10_OPCODE_DCL_CONSTANT_BUFFER               = 89,
+   VGPU10_OPCODE_DCL_SAMPLER                       = 90,
+   VGPU10_OPCODE_DCL_INDEX_RANGE                   = 91,
+   VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY  = 92,
+   VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE            = 93,
+   VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT       = 94,
+   VGPU10_OPCODE_DCL_INPUT                         = 95,
+   VGPU10_OPCODE_DCL_INPUT_SGV                     = 96,
+   VGPU10_OPCODE_DCL_INPUT_SIV                     = 97,
+   VGPU10_OPCODE_DCL_INPUT_PS                      = 98,
+   VGPU10_OPCODE_DCL_INPUT_PS_SGV                  = 99,
+   VGPU10_OPCODE_DCL_INPUT_PS_SIV                  = 100,
+   VGPU10_OPCODE_DCL_OUTPUT                        = 101,
+   VGPU10_OPCODE_DCL_OUTPUT_SGV                    = 102,
+   VGPU10_OPCODE_DCL_OUTPUT_SIV                    = 103,
+   VGPU10_OPCODE_DCL_TEMPS                         = 104,
+   VGPU10_OPCODE_DCL_INDEXABLE_TEMP                = 105,
+   VGPU10_OPCODE_DCL_GLOBAL_FLAGS                  = 106,
+   VGPU10_OPCODE_IDIV                              = 107,
+   VGPU10_NUM_OPCODES                  /* Should be the last entry. */
+} VGPU10_OPCODE_TYPE;
+
+typedef enum {
+   VGPU10_INTERPOLATION_UNDEFINED = 0,
+   VGPU10_INTERPOLATION_CONSTANT = 1,
+   VGPU10_INTERPOLATION_LINEAR = 2,
+   VGPU10_INTERPOLATION_LINEAR_CENTROID = 3,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+   VGPU10_INTERPOLATION_LINEAR_SAMPLE = 6,                  /* DX10.1 */
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7     /* DX10.1 */
+} VGPU10_INTERPOLATION_MODE;
+
+typedef enum {
+   VGPU10_RESOURCE_DIMENSION_UNKNOWN = 0,
+   VGPU10_RESOURCE_DIMENSION_BUFFER = 1,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1D = 2,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2D = 3,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE3D = 5,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10
+} VGPU10_RESOURCE_DIMENSION;
+
+typedef enum {
+   VGPU10_SAMPLER_MODE_DEFAULT = 0,
+   VGPU10_SAMPLER_MODE_COMPARISON = 1,
+   VGPU10_SAMPLER_MODE_MONO = 2
+} VGPU10_SAMPLER_MODE;
+
+typedef enum {
+   VGPU10_INSTRUCTION_TEST_ZERO     = 0,
+   VGPU10_INSTRUCTION_TEST_NONZERO  = 1
+} VGPU10_INSTRUCTION_TEST_BOOLEAN;
+
+typedef enum {
+   VGPU10_CB_IMMEDIATE_INDEXED   = 0,
+   VGPU10_CB_DYNAMIC_INDEXED     = 1
+} VGPU10_CB_ACCESS_PATTERN;
+
+typedef enum {
+   VGPU10_PRIMITIVE_UNDEFINED    = 0,
+   VGPU10_PRIMITIVE_POINT        = 1,
+   VGPU10_PRIMITIVE_LINE         = 2,
+   VGPU10_PRIMITIVE_TRIANGLE     = 3,
+   VGPU10_PRIMITIVE_LINE_ADJ     = 6,
+   VGPU10_PRIMITIVE_TRIANGLE_ADJ = 7
+} VGPU10_PRIMITIVE;
+
+typedef enum {
+   VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED          = 0,
+   VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST          = 1,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST           = 2,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP          = 3,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST       = 4,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP      = 5,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ       = 10,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ      = 11,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ   = 12,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ  = 13
+} VGPU10_PRIMITIVE_TOPOLOGY;
+
+typedef enum {
+   VGPU10_CUSTOMDATA_COMMENT                       = 0,
+   VGPU10_CUSTOMDATA_DEBUGINFO                     = 1,
+   VGPU10_CUSTOMDATA_OPAQUE                        = 2,
+   VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER = 3
+} VGPU10_CUSTOMDATA_CLASS;
+
+typedef enum {
+   VGPU10_RESINFO_RETURN_FLOAT      = 0,
+   VGPU10_RESINFO_RETURN_RCPFLOAT   = 1,
+   VGPU10_RESINFO_RETURN_UINT       = 2
+} VGPU10_RESINFO_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType          : 11; /* VGPU10_OPCODE_TYPE */
+      unsigned int interpolationMode   : 4;  /* VGPU10_INTERPOLATION_MODE */
+      unsigned int                     : 3;
+      unsigned int testBoolean         : 1;  /* VGPU10_INSTRUCTION_TEST_BOOLEAN */
+      unsigned int                     : 5;
+      unsigned int instructionLength   : 7;
+      unsigned int extended            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resourceDimension   : 5;  /* VGPU10_RESOURCE_DIMENSION */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int samplerMode         : 4;  /* VGPU10_SAMPLER_MODE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int accessPattern       : 1;  /* VGPU10_CB_ACCESS_PATTERN */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitive           : 6;  /* VGPU10_PRIMITIVE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitiveTopology   : 6;  /* VGPU10_PRIMITIVE_TOPOLOGY */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int customDataClass     : 21; /* VGPU10_CUSTOMDATA_CLASS */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resinfoReturnType   : 2;  /* VGPU10_RESINFO_RETURN_TYPE */
+      unsigned int saturate            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int refactoringAllowed  : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPCODE_EMPTY = 0,
+   VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
+} VGPU10_EXTENDED_OPCODE_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType : 6;  /* VGPU10_EXTENDED_OPCODE_TYPE */
+      unsigned int            : 3;
+      unsigned int offsetU    : 4;  /* Two's complement. */
+      unsigned int offsetV    : 4;  /* Two's complement. */
+      unsigned int offsetW    : 4;  /* Two's complement. */
+      unsigned int            : 10;
+      unsigned int extended   : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken1;
+
+
+typedef enum {
+   VGPU10_OPERAND_0_COMPONENT = 0,
+   VGPU10_OPERAND_1_COMPONENT = 1,
+   VGPU10_OPERAND_4_COMPONENT = 2,
+   VGPU10_OPERAND_N_COMPONENT = 3   /* Unused for now. */
+} VGPU10_OPERAND_NUM_COMPONENTS;
+
+typedef enum {
+   VGPU10_OPERAND_4_COMPONENT_MASK_MODE = 0,
+   VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,
+   VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE = 2
+} VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_X    0x1
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Y    0x2
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Z    0x4
+#define VGPU10_OPERAND_4_COMPONENT_MASK_W    0x8
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XY   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Y)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZ   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XW   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZ   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YW   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ZW   (VGPU10_OPERAND_4_COMPONENT_MASK_Z   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZ  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYW  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZW  (VGPU10_OPERAND_4_COMPONENT_MASK_XZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZW  (VGPU10_OPERAND_4_COMPONENT_MASK_YZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZW (VGPU10_OPERAND_4_COMPONENT_MASK_XYZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ALL  VGPU10_OPERAND_4_COMPONENT_MASK_XYZW
+
+#define VGPU10_REGISTER_INDEX_FROM_SEMANTIC  0xffffffff
+
+typedef enum {
+   VGPU10_COMPONENT_X = 0,
+   VGPU10_COMPONENT_Y = 1,
+   VGPU10_COMPONENT_Z = 2,
+   VGPU10_COMPONENT_W = 3
+} VGPU10_COMPONENT_NAME;
+
+typedef enum {
+   VGPU10_OPERAND_TYPE_TEMP = 0,
+   VGPU10_OPERAND_TYPE_INPUT = 1,
+   VGPU10_OPERAND_TYPE_OUTPUT = 2,
+   VGPU10_OPERAND_TYPE_INDEXABLE_TEMP = 3,
+   VGPU10_OPERAND_TYPE_IMMEDIATE32 = 4,
+   VGPU10_OPERAND_TYPE_IMMEDIATE64 = 5,
+   VGPU10_OPERAND_TYPE_SAMPLER = 6,
+   VGPU10_OPERAND_TYPE_RESOURCE = 7,
+   VGPU10_OPERAND_TYPE_CONSTANT_BUFFER = 8,
+   VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9,
+   VGPU10_OPERAND_TYPE_LABEL = 10,
+   VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID = 11,
+   VGPU10_OPERAND_TYPE_OUTPUT_DEPTH = 12,
+   VGPU10_OPERAND_TYPE_NULL = 13,
+   VGPU10_OPERAND_TYPE_RASTERIZER = 14,            /* DX10.1 */
+   VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15   /* DX10.1 */
+} VGPU10_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_0D = 0,
+   VGPU10_OPERAND_INDEX_1D = 1,
+   VGPU10_OPERAND_INDEX_2D = 2,
+   VGPU10_OPERAND_INDEX_3D = 3
+} VGPU10_OPERAND_INDEX_DIMENSION;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_IMMEDIATE32 = 0,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64 = 1,
+   VGPU10_OPERAND_INDEX_RELATIVE = 2,
+   VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4
+} VGPU10_OPERAND_INDEX_REPRESENTATION;
+
+typedef union {
+   struct {
+      unsigned int numComponents          : 2;  /* VGPU10_OPERAND_NUM_COMPONENTS */
+      unsigned int selectionMode          : 2;  /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
+      unsigned int mask                   : 4;  /* D3D10_SB_OPERAND_4_COMPONENT_MASK_* */
+      unsigned int                        : 4;
+      unsigned int operandType            : 8;  /* VGPU10_OPERAND_TYPE */
+      unsigned int indexDimension         : 2;  /* VGPU10_OPERAND_INDEX_DIMENSION */
+      unsigned int index0Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int index1Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int                        : 3;
+      unsigned int extended               : 1;
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int swizzleX               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleY               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleZ               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleW               : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int selectMask             : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   uint32 value;
+} VGPU10OperandToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPERAND_EMPTY = 0,
+   VGPU10_EXTENDED_OPERAND_MODIFIER = 1
+} VGPU10_EXTENDED_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_MODIFIER_NONE = 0,
+   VGPU10_OPERAND_MODIFIER_NEG = 1,
+   VGPU10_OPERAND_MODIFIER_ABS = 2,
+   VGPU10_OPERAND_MODIFIER_ABSNEG = 3
+} VGPU10_OPERAND_MODIFIER;
+
+typedef union {
+   struct {
+      unsigned int extendedOperandType : 6;  /* VGPU10_EXTENDED_OPERAND_TYPE */
+      unsigned int operandModifier     : 8;  /* VGPU10_OPERAND_MODIFIER */
+      unsigned int                     : 17;
+      unsigned int extended            : 1;
+   };
+   uint32 value;
+} VGPU10OperandToken1;
+
+
+typedef enum {
+   VGPU10_RETURN_TYPE_UNORM = 1,
+   VGPU10_RETURN_TYPE_SNORM = 2,
+   VGPU10_RETURN_TYPE_SINT = 3,
+   VGPU10_RETURN_TYPE_UINT = 4,
+   VGPU10_RETURN_TYPE_FLOAT = 5,
+   VGPU10_RETURN_TYPE_MIXED = 6
+} VGPU10_RESOURCE_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int component0 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component1 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component2 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component3 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+   };
+   uint32 value;
+} VGPU10ResourceReturnTypeToken;
+
+
+typedef enum {
+   VGPU10_NAME_UNDEFINED = 0,
+   VGPU10_NAME_POSITION = 1,
+   VGPU10_NAME_CLIP_DISTANCE = 2,
+   VGPU10_NAME_CULL_DISTANCE = 3,
+   VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+   VGPU10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+   VGPU10_NAME_VERTEX_ID = 6,
+   VGPU10_NAME_PRIMITIVE_ID = 7,
+   VGPU10_NAME_INSTANCE_ID = 8,
+   VGPU10_NAME_IS_FRONT_FACE = 9,
+   VGPU10_NAME_SAMPLE_INDEX = 10,
+} VGPU10_SYSTEM_NAME;
+
+typedef union {
+   struct {
+      unsigned int name : 16; /* VGPU10_SYSTEM_NAME */
+   };
+   uint32 value;
+} VGPU10NameToken;
+
+#endif
diff --git a/src/gallium/drivers/svga/include/svga3d_dx.h b/src/gallium/drivers/svga/include/svga3d_dx.h
new file mode 100644
index 00000000000..fce2b04227e
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_dx.h
@@ -0,0 +1,1521 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_dx.h --
+ *
+ *       SVGA 3d hardware definitions for DX10 support.
+ */
+
+#ifndef _SVGA3D_DX_H_
+#define _SVGA3D_DX_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#include "svga3d_limits.h"
+
+#define SVGA3D_INPUT_MIN               0
+#define SVGA3D_INPUT_PER_VERTEX_DATA   0
+#define SVGA3D_INPUT_PER_INSTANCE_DATA 1
+#define SVGA3D_INPUT_MAX               2
+typedef uint32 SVGA3dInputClassification;
+
+#define SVGA3D_RESOURCE_TYPE_MIN      1
+#define SVGA3D_RESOURCE_BUFFER        1
+#define SVGA3D_RESOURCE_TEXTURE1D     2
+#define SVGA3D_RESOURCE_TEXTURE2D     3
+#define SVGA3D_RESOURCE_TEXTURE3D     4
+#define SVGA3D_RESOURCE_TEXTURECUBE   5
+#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6
+#define SVGA3D_RESOURCE_BUFFEREX      6
+#define SVGA3D_RESOURCE_TYPE_MAX      7
+typedef uint32 SVGA3dResourceType;
+
+#define SVGA3D_DEPTH_WRITE_MASK_ZERO   0
+#define SVGA3D_DEPTH_WRITE_MASK_ALL    1
+typedef uint8 SVGA3dDepthWriteMask;
+
+#define SVGA3D_FILTER_MIP_LINEAR  (1 << 0)
+#define SVGA3D_FILTER_MAG_LINEAR  (1 << 2)
+#define SVGA3D_FILTER_MIN_LINEAR  (1 << 4)
+#define SVGA3D_FILTER_ANISOTROPIC (1 << 6)
+#define SVGA3D_FILTER_COMPARE     (1 << 7)
+typedef uint32 SVGA3dFilter;
+
+#define SVGA3D_CULL_INVALID 0
+#define SVGA3D_CULL_MIN     1
+#define SVGA3D_CULL_NONE    1
+#define SVGA3D_CULL_FRONT   2
+#define SVGA3D_CULL_BACK    3
+#define SVGA3D_CULL_MAX     4
+typedef uint8 SVGA3dCullMode;
+
+#define SVGA3D_COMPARISON_INVALID         0
+#define SVGA3D_COMPARISON_MIN             1
+#define SVGA3D_COMPARISON_NEVER           1
+#define SVGA3D_COMPARISON_LESS            2
+#define SVGA3D_COMPARISON_EQUAL           3
+#define SVGA3D_COMPARISON_LESS_EQUAL      4
+#define SVGA3D_COMPARISON_GREATER         5
+#define SVGA3D_COMPARISON_NOT_EQUAL       6
+#define SVGA3D_COMPARISON_GREATER_EQUAL   7
+#define SVGA3D_COMPARISON_ALWAYS          8
+#define SVGA3D_COMPARISON_MAX             9
+typedef uint8 SVGA3dComparisonFunc;
+
+#define SVGA3D_DX_MAX_VERTEXBUFFERS 32
+#define SVGA3D_DX_MAX_VERTEXINPUTREGISTERS 16
+#define SVGA3D_DX_MAX_SOTARGETS 4
+#define SVGA3D_DX_MAX_SRVIEWS 128
+#define SVGA3D_DX_MAX_CONSTBUFFERS 16
+#define SVGA3D_DX_MAX_SAMPLERS 16
+
+/* Id limits */
+static const uint32 SVGA3dBlendObjectCountPerContext = 4096;
+static const uint32 SVGA3dDepthStencilObjectCountPerContext = 4096;
+
+typedef uint32 SVGA3dSurfaceId;
+typedef uint32 SVGA3dShaderResourceViewId;
+typedef uint32 SVGA3dRenderTargetViewId;
+typedef uint32 SVGA3dDepthStencilViewId;
+
+typedef uint32 SVGA3dShaderId;
+typedef uint32 SVGA3dElementLayoutId;
+typedef uint32 SVGA3dSamplerId;
+typedef uint32 SVGA3dBlendStateId;
+typedef uint32 SVGA3dDepthStencilStateId;
+typedef uint32 SVGA3dRasterizerStateId;
+typedef uint32 SVGA3dQueryId;
+typedef uint32 SVGA3dStreamOutputId;
+
+typedef union {
+   struct {
+      float r;
+      float g;
+      float b;
+      float a;
+   };
+
+   float value[4];
+} SVGA3dRGBAFloat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableDXContextEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineContext;   /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyContext;   /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */
+
+/*
+ * Bind a DX context.
+ *
+ * validContents should be set to 0 for new contexts,
+ * and 1 if this is an old context which is getting paged
+ * back on to the device.
+ *
+ * For new contexts, it is recommended that the driver
+ * issue commands to initialize all interesting state
+ * prior to rendering.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindContext {
+   uint32 cid;
+   SVGAMobId mobid;
+   uint32 validContents;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindContext;   /* SVGA_3D_CMD_DX_BIND_CONTEXT */
+
+/*
+ * Readback a DX context.
+ * (Request that the device flush the contents back into guest memory.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackContext;   /* SVGA_3D_CMD_DX_READBACK_CONTEXT */
+
+/*
+ * Invalidate a guest-backed context.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateContext;   /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dReplyFormatData {
+   uint32 formatSupport;
+   uint32 msaa2xQualityLevels:5;
+   uint32 msaa4xQualityLevels:5;
+   uint32 msaa8xQualityLevels:5;
+   uint32 msaa16xQualityLevels:5;
+   uint32 msaa32xQualityLevels:5;
+   uint32 pad:7;
+}
+#include "vmware_pack_end.h"
+SVGA3dReplyFormatData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSingleConstantBuffer {
+   uint32 slot;
+   SVGA3dShaderType type;
+   SVGA3dSurfaceId sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSingleConstantBuffer;
+/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShaderResources {
+   uint32 startView;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dShaderResourceViewId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSamplers {
+   uint32 startSampler;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dSamplerId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDraw {
+   uint32 vertexCount;
+   uint32 startVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexed {
+   uint32 indexCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawInstanced {
+   uint32 vertexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexedInstanced {
+   uint32 indexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawAuto {
+   uint32 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetInputLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dVertexBuffer {
+   SVGA3dSurfaceId sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexBuffer;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetVertexBuffers {
+   uint32 startBuffer;
+   /* Followed by a variable number of SVGA3dVertexBuffer's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetIndexBuffer {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetTopology {
+   SVGA3dPrimitiveType topology;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRenderTargets {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   /* Followed by a variable number of SVGA3dRenderTargetViewId's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetBlendState {
+   SVGA3dBlendStateId blendId;
+   float blendFactor[4];
+   uint32 sampleMask;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+   uint32 stencilRef;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */
+
+#define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0)
+typedef uint32 SVGA3dDXQueryFlags;
+
+/*
+ * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device
+ * to track query state transitions, but are not intended to be used by the
+ * driver.
+ */
+#define SVGADX_QDSTATE_INVALID   ((uint8)-1) /* Query has no state */
+#define SVGADX_QDSTATE_MIN       0
+#define SVGADX_QDSTATE_IDLE      0   /* Query hasn't started yet */
+#define SVGADX_QDSTATE_ACTIVE    1   /* Query is actively gathering data */
+#define SVGADX_QDSTATE_PENDING   2   /* Query is waiting for results */
+#define SVGADX_QDSTATE_FINISHED  3   /* Query has completed */
+#define SVGADX_QDSTATE_MAX       4
+typedef uint8 SVGADXQueryDeviceState;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dQueryTypeUint8 type;
+   uint16 pad0;
+   SVGADXQueryDeviceState state;
+   SVGA3dDXQueryFlags flags;
+   SVGAMobId mobid;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXQueryEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineQuery {
+   SVGA3dQueryId queryId;
+   SVGA3dQueryType type;
+   SVGA3dDXQueryFlags flags;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetQueryOffset {
+   SVGA3dQueryId queryId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBeginQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXEndQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXMoveQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllQuery {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackAllQuery {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetPredication {
+   SVGA3dQueryId queryId;
+   uint32 predicateValue;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct MKS3dDXSOState {
+   uint32 offset;       /* Starting offset */
+   uint32 intOffset;    /* Internal offset */
+   uint32 vertexCount;  /* vertices written */
+   uint32 sizeInBytes;  /* max bytes to write */
+}
+#include "vmware_pack_end.h"
+SVGA3dDXSOState;
+
+/* Set the offset field to this value to append SO values to the buffer */
+#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dSoTarget {
+   SVGA3dSurfaceId sid;
+   uint32 offset;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dSoTarget;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSOTargets {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dSOTarget's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dViewport
+{
+   float x;
+   float y;
+   float width;
+   float height;
+   float minDepth;
+   float maxDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dViewport;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetViewports {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dViewport's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */
+
+#define SVGA3D_DX_MAX_VIEWPORTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetScissorRects {
+   uint32 pad0;
+   /* Followed by a variable number of SVGASignedRect's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */
+
+#define SVGA3D_DX_MAX_SCISSORRECTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+   SVGA3dRGBAFloat rgba;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearDepthStencilView {
+   uint16 flags;
+   uint16 stencil;
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   float depth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopyRegion {
+   SVGA3dSurfaceId dstSid;
+   uint32 dstSubResource;
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopyRegion;
+/* SVGA_3D_CMD_DX_PRED_COPY_REGION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopy {
+   SVGA3dSurfaceId dstSid;
+   SVGA3dSurfaceId srcSid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferCopy {
+   SVGA3dSurfaceId dest;
+   SVGA3dSurfaceId src;
+   uint32 destX;
+   uint32 srcX;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferCopy;
+/* SVGA_3D_CMD_DX_BUFFER_COPY */
+
+typedef uint32 SVGA3dDXStretchBltMode;
+#define SVGADX_STRETCHBLT_LINEAR         (1 << 0)
+#define SVGADX_STRETCHBLT_FORCE_SRC_SRGB (1 << 1)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXStretchBlt {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dSurfaceId dstSid;
+   uint32 destSubResource;
+   SVGA3dBox boxSrc;
+   SVGA3dBox boxDest;
+   SVGA3dDXStretchBltMode mode;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXStretchBlt; /* SVGA_3D_CMD_DX_STRETCHBLT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXGenMips {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */
+
+/*
+ * Defines a resource/DX surface.  Resources share the surfaceId namespace.
+ *
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBSurface_v2 {
+   uint32 sid;
+   SVGA3dSurfaceFlags surfaceFlags;
+   SVGA3dSurfaceFormat format;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   uint32 arraySize;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBSurface_v2;   /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */
+
+/*
+ * Update a sub-resource in a guest-backed resource.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXUpdateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+   SVGA3dBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXUpdateSubResource;   /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */
+
+/*
+ * Readback a subresource in a guest-backed resource.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackSubResource;   /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */
+
+/*
+ * Invalidate an image in a guest-backed surface.
+ * (Notify the device that the contents can be lost.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateSubResource;   /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXTransferFromBuffer;   /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.  Supported if SVGA3D_DEVCAP_DXCONTEXT is set.
+ * The context is implied from the command buffer header.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredTransferFromBuffer;
+/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSurfaceCopyAndReadback {
+   SVGA3dSurfaceId srcSid;
+   SVGA3dSurfaceId destSid;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSurfaceCopyAndReadback;
+/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXHint {
+   uint32 hintId;
+
+   /*
+    * Followed by variable sized data depending on the hintId.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXHint;
+/* SVGA_3D_CMD_DX_HINT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferUpdate {
+   SVGA3dSurfaceId sid;
+   uint32 x;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferUpdate;
+/* SVGA_3D_CMD_DX_BUFFER_UPDATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetConstantBufferOffset {
+   uint32 slot;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetConstantBufferOffset;
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 pad0;
+         uint32 pad1;
+      } buffer;
+      struct {
+         uint32 mostDetailedMip;
+         uint32 firstArraySlice;
+         uint32 mipLevels;
+         uint32 arraySize;
+      } tex;
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 flags;
+         uint32 pad0;
+      } bufferex;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dShaderResourceViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dShaderResourceViewDesc desc;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSRViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dShaderResourceViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShaderResourceView;
+/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShaderResourceView;
+/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dRenderTargetViewDesc {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+      } buffer;
+      struct {
+         uint32 mipSlice;
+         uint32 firstArraySlice;
+         uint32 arraySize;
+      } tex;                    /* 1d, 2d, cube */
+      struct {
+         uint32 mipSlice;
+         uint32 firstW;
+         uint32 wSize;
+      } tex3D;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dRenderTargetViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dRenderTargetViewDesc desc;
+   uint32 pad[2];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRTViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dRenderTargetViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRenderTargetView;
+/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRenderTargetView;
+/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */
+
+/*
+ */
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH   0x01
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02
+#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK         0x03
+typedef uint8 SVGA3DCreateDSViewFlags;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+   uint32 pad2;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDSViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilView;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilView;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dInputElementDesc {
+   uint32 inputSlot;
+   uint32 alignedByteOffset;
+   SVGA3dSurfaceFormat format;
+   SVGA3dInputClassification inputSlotClass;
+   uint32 instanceDataStepRate;
+   uint32 inputRegister;
+}
+#include "vmware_pack_end.h"
+SVGA3dInputElementDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * XXX: How many of these can there be?
+    */
+   uint32 elid;
+   uint32 numDescs;
+   SVGA3dInputElementDesc desc[32];
+   uint32 pad[62];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXElementLayoutEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+   /* Followed by a variable number of SVGA3dInputElementDesc's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineElementLayout;
+/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyElementLayout;
+/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */
+
+
+#define SVGA3D_DX_MAX_RENDER_TARGETS 8
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dDXBlendStatePerRT {
+      uint8 blendEnable;
+      uint8 srcBlend;
+      uint8 destBlend;
+      uint8 blendOp;
+      uint8 srcBlendAlpha;
+      uint8 destBlendAlpha;
+      uint8 blendOpAlpha;
+      uint8 renderTargetWriteMask;
+      uint8 logicOpEnable;
+      uint8 logicOp;
+      uint16 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dDXBlendStatePerRT;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   uint32 pad1[7];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXBlendStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineBlendState {
+   SVGA3dBlendStateId blendId;
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyBlendState {
+   SVGA3dBlendStateId blendId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDepthStencilEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilState;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilState;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+   uint32 forcedSampleCount;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRasterizerStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRasterizerState;
+/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRasterizerState;
+/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+   uint32 pad2[6];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSamplerEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineSamplerState {
+   SVGA3dSamplerId samplerId;
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroySamplerState {
+   SVGA3dSamplerId samplerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+   uint32 sizeInBytes; /* Number of bytes of shader text. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGACOTableDXShaderEntry {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+   uint32 pad[4];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXShaderEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShader {
+   SVGA3dShaderId shaderId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindShader {
+   uint32 cid;
+   uint32 shid;
+   SVGAMobId mobid;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindShader;   /* SVGA_3D_CMD_DX_BIND_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllShader {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllShader;   /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXCondBindAllShader {
+   uint32 cid;
+   SVGAMobId testMobid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXCondBindAllShader;   /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */
+
+/*
+ * The maximum number of streamout decl's in each streamout entry.
+ */
+#define SVGA3D_MAX_STREAMOUT_DECLS 64
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dStreamOutputDeclarationEntry {
+   uint32 outputSlot;
+   uint32 registerIndex;
+   uint8  registerMask;
+   uint8  pad0;
+   uint16 pad1;
+   uint32 stream;
+}
+#include "vmware_pack_end.h"
+SVGA3dStreamOutputDeclarationEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGAOTableStreamOutputEntry {
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+   uint32 pad[250];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXStreamOutputEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineStreamOutput {
+   SVGA3dStreamOutputId soid;
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 value;
+   uint32 mobId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMobFence64;  /* SVGA_3D_CMD_DX_MOB_FENCE_64 */
+
+/*
+ * SVGA3dCmdSetCOTable --
+ *
+ * This command allows the guest to bind a mob to a context-object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetCOTable {
+   uint32 cid;
+   uint32 mobid;
+   SVGACOTableType type;
+   uint32 validSizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackCOTable {
+   uint32 cid;
+   SVGACOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCOTableData {
+   uint32 mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCOTableData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dBufferBinding {
+   uint32 bufferId;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dConstantBufferBinding {
+   uint32 sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dConstantBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXInputAssemblyMobFormat {
+   uint32 layoutId;
+   SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS];
+   uint32 indexBufferSid;
+   uint32 pad;
+   uint32 indexBufferOffset;
+   uint32 indexBufferFormat;
+   uint32 topology;
+}
+#include "vmware_pack_end.h"
+SVGADXInputAssemblyMobFormat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXContextMobFormat {
+   SVGADXInputAssemblyMobFormat inputAssembly;
+
+   struct {
+      uint32 blendStateId;
+      uint32 blendFactor[4];
+      uint32 sampleMask;
+      uint32 depthStencilStateId;
+      uint32 stencilRef;
+      uint32 rasterizerStateId;
+      uint32 depthStencilViewId;
+      uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS];
+      uint32 unorderedAccessViewIds[SVGA3D_MAX_UAVIEWS];
+   } renderState;
+
+   struct {
+      uint32 targets[SVGA3D_DX_MAX_SOTARGETS];
+      uint32 soid;
+   } streamOut;
+   uint32 pad0[11];
+
+   uint8 numViewports;
+   uint8 numScissorRects;
+   uint16 pad1[1];
+
+   uint32 pad2[3];
+
+   SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+   uint32 pad3[32];
+
+   SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS];
+   uint32 pad4[64];
+
+   struct {
+      uint32 queryID;
+      uint32 value;
+   } predication;
+   uint32 pad5[2];
+
+   struct {
+      uint32 shaderId;
+      SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS];
+      uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS];
+      uint32 samplers[SVGA3D_DX_MAX_SAMPLERS];
+   } shaderState[SVGA3D_NUM_SHADERTYPE];
+   uint32 pad6[26];
+
+   SVGA3dQueryId queryID[SVGA3D_MAX_QUERY];
+
+   SVGA3dCOTableData cotables[SVGA_COTABLE_MAX];
+   uint32 pad7[380];
+}
+#include "vmware_pack_end.h"
+SVGADXContextMobFormat;
+
+#endif /* _SVGA3D_DX_H_ */
-- 
cgit v1.2.3


From c191b507cbbc4572c9a58cf019db08def651b265 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 6 Aug 2015 16:28:19 -0600
Subject: svga: update the svga3d device header files

Remove some obsolete svga_dump.c code for items which no longer exist.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/include/svga3d_caps.h     |    4 +-
 src/gallium/drivers/svga/include/svga3d_cmd.h      |  844 ++++++++---
 src/gallium/drivers/svga/include/svga3d_devcaps.h  |  225 ++-
 src/gallium/drivers/svga/include/svga3d_limits.h   |    8 +-
 src/gallium/drivers/svga/include/svga3d_reg.h      |    3 +-
 .../drivers/svga/include/svga3d_surfacedefs.h      | 1529 +++++++++++---------
 src/gallium/drivers/svga/include/svga3d_types.h    |  483 ++++++-
 src/gallium/drivers/svga/include/svga_escape.h     |    4 +-
 src/gallium/drivers/svga/include/svga_overlay.h    |   24 +-
 src/gallium/drivers/svga/include/svga_reg.h        |  102 +-
 src/gallium/drivers/svga/svgadump/svga_dump.c      |   23 -
 11 files changed, 2206 insertions(+), 1043 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h
index c6c8e3667a6..01c8ba79094 100644
--- a/src/gallium/drivers/svga/include/svga3d_caps.h
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -111,4 +111,4 @@ SVGA3dCapsRecord;
 typedef uint32 SVGA3dCapPair[2];
 
 
-#endif // _SVGA3D_CAPS_H_
+#endif
diff --git a/src/gallium/drivers/svga/include/svga3d_cmd.h b/src/gallium/drivers/svga/include/svga3d_cmd.h
index 8953bf05f20..c843417e8de 100644
--- a/src/gallium/drivers/svga/include/svga3d_cmd.h
+++ b/src/gallium/drivers/svga/include/svga3d_cmd.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -53,130 +53,227 @@
  * and up.
  */
 
-#define SVGA_3D_CMD_LEGACY_BASE                                1000
-#define SVGA_3D_CMD_BASE                                       1040
-
-#define SVGA_3D_CMD_SURFACE_DEFINE                             1040
-#define SVGA_3D_CMD_SURFACE_DESTROY                            1041
-#define SVGA_3D_CMD_SURFACE_COPY                               1042
-#define SVGA_3D_CMD_SURFACE_STRETCHBLT                         1043
-#define SVGA_3D_CMD_SURFACE_DMA                                1044
-#define SVGA_3D_CMD_CONTEXT_DEFINE                             1045
-#define SVGA_3D_CMD_CONTEXT_DESTROY                            1046
-#define SVGA_3D_CMD_SETTRANSFORM                               1047
-#define SVGA_3D_CMD_SETZRANGE                                  1048
-#define SVGA_3D_CMD_SETRENDERSTATE                             1049
-#define SVGA_3D_CMD_SETRENDERTARGET                            1050
-#define SVGA_3D_CMD_SETTEXTURESTATE                            1051
-#define SVGA_3D_CMD_SETMATERIAL                                1052
-#define SVGA_3D_CMD_SETLIGHTDATA                               1053
-#define SVGA_3D_CMD_SETLIGHTENABLED                            1054
-#define SVGA_3D_CMD_SETVIEWPORT                                1055
-#define SVGA_3D_CMD_SETCLIPPLANE                               1056
-#define SVGA_3D_CMD_CLEAR                                      1057
-#define SVGA_3D_CMD_PRESENT                                    1058
-#define SVGA_3D_CMD_SHADER_DEFINE                              1059
-#define SVGA_3D_CMD_SHADER_DESTROY                             1060
-#define SVGA_3D_CMD_SET_SHADER                                 1061
-#define SVGA_3D_CMD_SET_SHADER_CONST                           1062
-#define SVGA_3D_CMD_DRAW_PRIMITIVES                            1063
-#define SVGA_3D_CMD_SETSCISSORRECT                             1064
-#define SVGA_3D_CMD_BEGIN_QUERY                                1065
-#define SVGA_3D_CMD_END_QUERY                                  1066
-#define SVGA_3D_CMD_WAIT_FOR_QUERY                             1067
-#define SVGA_3D_CMD_PRESENT_READBACK                           1068
-#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     1069
-#define SVGA_3D_CMD_SURFACE_DEFINE_V2                          1070
-#define SVGA_3D_CMD_GENERATE_MIPMAPS                           1071
-#define SVGA_3D_CMD_VIDEO_CREATE_DECODER                       1072
-#define SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      1073
-#define SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     1074
-#define SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    1075
-#define SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   1076
-#define SVGA_3D_CMD_VIDEO_DECODE_RENDER                        1077
-#define SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     1078
-#define SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        1079
-#define SVGA_3D_CMD_ACTIVATE_SURFACE                           1080
-#define SVGA_3D_CMD_DEACTIVATE_SURFACE                         1081
-#define SVGA_3D_CMD_SCREEN_DMA                                 1082
-#define SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE                   1083
-#define SVGA_3D_CMD_OPEN_CONTEXT_SURFACE                       1084
-
-#define SVGA_3D_CMD_LOGICOPS_BITBLT                            1085
-#define SVGA_3D_CMD_LOGICOPS_TRANSBLT                          1086
-#define SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        1087
-#define SVGA_3D_CMD_LOGICOPS_COLORFILL                         1088
-#define SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        1089
-#define SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    1090
-
-#define SVGA_3D_CMD_SET_OTABLE_BASE                            1091
-#define SVGA_3D_CMD_READBACK_OTABLE                            1092
-
-#define SVGA_3D_CMD_DEFINE_GB_MOB                              1093
-#define SVGA_3D_CMD_DESTROY_GB_MOB                             1094
-#define SVGA_3D_CMD_REDEFINE_GB_MOB                            1095
-#define SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      1096
-
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE                          1097
-#define SVGA_3D_CMD_DESTROY_GB_SURFACE                         1098
-#define SVGA_3D_CMD_BIND_GB_SURFACE                            1099
-#define SVGA_3D_CMD_COND_BIND_GB_SURFACE                       1100
-#define SVGA_3D_CMD_UPDATE_GB_IMAGE                            1101
-#define SVGA_3D_CMD_UPDATE_GB_SURFACE                          1102
-#define SVGA_3D_CMD_READBACK_GB_IMAGE                          1103
-#define SVGA_3D_CMD_READBACK_GB_SURFACE                        1104
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        1105
-#define SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      1106
-
-#define SVGA_3D_CMD_DEFINE_GB_CONTEXT                          1107
-#define SVGA_3D_CMD_DESTROY_GB_CONTEXT                         1108
-#define SVGA_3D_CMD_BIND_GB_CONTEXT                            1109
-#define SVGA_3D_CMD_READBACK_GB_CONTEXT                        1110
-#define SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      1111
-
-#define SVGA_3D_CMD_DEFINE_GB_SHADER                           1112
-#define SVGA_3D_CMD_DESTROY_GB_SHADER                          1113
-#define SVGA_3D_CMD_BIND_GB_SHADER                             1114
-
-#define SVGA_3D_CMD_BIND_SHADERCONSTS                          1115
-
-#define SVGA_3D_CMD_BEGIN_GB_QUERY                             1116
-#define SVGA_3D_CMD_END_GB_QUERY                               1117
-#define SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          1118
-
-#define SVGA_3D_CMD_NOP                                        1119
-
-#define SVGA_3D_CMD_ENABLE_GART                                1120
-#define SVGA_3D_CMD_DISABLE_GART                               1121
-#define SVGA_3D_CMD_MAP_MOB_INTO_GART                          1122
-#define SVGA_3D_CMD_UNMAP_GART_RANGE                           1123
-
-#define SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     1124
-#define SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    1125
-#define SVGA_3D_CMD_BIND_GB_SCREENTARGET                       1126
-#define SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     1127
-
-#define SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  1128
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                1129
-
-#define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 1130
-
-#define SVGA_3D_CMD_GB_SCREEN_DMA                              1131
-#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 1132
-#define SVGA_3D_CMD_GB_MOB_FENCE                               1133
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       1134
-#define SVGA_3D_CMD_DEFINE_GB_MOB64                            1135
-#define SVGA_3D_CMD_REDEFINE_GB_MOB64                          1136
-#define SVGA_3D_CMD_NOP_ERROR                                  1137
-
-#define SVGA_3D_CMD_RESERVED1                                  1138
-#define SVGA_3D_CMD_RESERVED2                                  1139
-#define SVGA_3D_CMD_RESERVED3                                  1140
-#define SVGA_3D_CMD_RESERVED4                                  1141
-#define SVGA_3D_CMD_RESERVED5                                  1142
-
-#define SVGA_3D_CMD_MAX                                        1203
-#define SVGA_3D_CMD_FUTURE_MAX                                 3000
+typedef enum {
+   SVGA_3D_CMD_LEGACY_BASE                                = 1000,
+   SVGA_3D_CMD_BASE                                       = 1040,
+
+   SVGA_3D_CMD_SURFACE_DEFINE                             = 1040,
+   SVGA_3D_CMD_SURFACE_DESTROY                            = 1041,
+   SVGA_3D_CMD_SURFACE_COPY                               = 1042,
+   SVGA_3D_CMD_SURFACE_STRETCHBLT                         = 1043,
+   SVGA_3D_CMD_SURFACE_DMA                                = 1044,
+   SVGA_3D_CMD_CONTEXT_DEFINE                             = 1045,
+   SVGA_3D_CMD_CONTEXT_DESTROY                            = 1046,
+   SVGA_3D_CMD_SETTRANSFORM                               = 1047,
+   SVGA_3D_CMD_SETZRANGE                                  = 1048,
+   SVGA_3D_CMD_SETRENDERSTATE                             = 1049,
+   SVGA_3D_CMD_SETRENDERTARGET                            = 1050,
+   SVGA_3D_CMD_SETTEXTURESTATE                            = 1051,
+   SVGA_3D_CMD_SETMATERIAL                                = 1052,
+   SVGA_3D_CMD_SETLIGHTDATA                               = 1053,
+   SVGA_3D_CMD_SETLIGHTENABLED                            = 1054,
+   SVGA_3D_CMD_SETVIEWPORT                                = 1055,
+   SVGA_3D_CMD_SETCLIPPLANE                               = 1056,
+   SVGA_3D_CMD_CLEAR                                      = 1057,
+   SVGA_3D_CMD_PRESENT                                    = 1058,
+   SVGA_3D_CMD_SHADER_DEFINE                              = 1059,
+   SVGA_3D_CMD_SHADER_DESTROY                             = 1060,
+   SVGA_3D_CMD_SET_SHADER                                 = 1061,
+   SVGA_3D_CMD_SET_SHADER_CONST                           = 1062,
+   SVGA_3D_CMD_DRAW_PRIMITIVES                            = 1063,
+   SVGA_3D_CMD_SETSCISSORRECT                             = 1064,
+   SVGA_3D_CMD_BEGIN_QUERY                                = 1065,
+   SVGA_3D_CMD_END_QUERY                                  = 1066,
+   SVGA_3D_CMD_WAIT_FOR_QUERY                             = 1067,
+   SVGA_3D_CMD_PRESENT_READBACK                           = 1068,
+   SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     = 1069,
+   SVGA_3D_CMD_SURFACE_DEFINE_V2                          = 1070,
+   SVGA_3D_CMD_GENERATE_MIPMAPS                           = 1071,
+   SVGA_3D_CMD_VIDEO_CREATE_DECODER                       = 1072,
+   SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      = 1073,
+   SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     = 1074,
+   SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    = 1075,
+   SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   = 1076,
+   SVGA_3D_CMD_VIDEO_DECODE_RENDER                        = 1077,
+   SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     = 1078,
+   SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        = 1079,
+   SVGA_3D_CMD_ACTIVATE_SURFACE                           = 1080,
+   SVGA_3D_CMD_DEACTIVATE_SURFACE                         = 1081,
+   SVGA_3D_CMD_SCREEN_DMA                                 = 1082,
+   SVGA_3D_CMD_DEAD1                                      = 1083,
+   SVGA_3D_CMD_DEAD2                                      = 1084,
+
+   SVGA_3D_CMD_LOGICOPS_BITBLT                            = 1085,
+   SVGA_3D_CMD_LOGICOPS_TRANSBLT                          = 1086,
+   SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        = 1087,
+   SVGA_3D_CMD_LOGICOPS_COLORFILL                         = 1088,
+   SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        = 1089,
+   SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    = 1090,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE                            = 1091,
+   SVGA_3D_CMD_READBACK_OTABLE                            = 1092,
+
+   SVGA_3D_CMD_DEFINE_GB_MOB                              = 1093,
+   SVGA_3D_CMD_DESTROY_GB_MOB                             = 1094,
+   SVGA_3D_CMD_DEAD3                                      = 1095,
+   SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      = 1096,
+
+   SVGA_3D_CMD_DEFINE_GB_SURFACE                          = 1097,
+   SVGA_3D_CMD_DESTROY_GB_SURFACE                         = 1098,
+   SVGA_3D_CMD_BIND_GB_SURFACE                            = 1099,
+   SVGA_3D_CMD_COND_BIND_GB_SURFACE                       = 1100,
+   SVGA_3D_CMD_UPDATE_GB_IMAGE                            = 1101,
+   SVGA_3D_CMD_UPDATE_GB_SURFACE                          = 1102,
+   SVGA_3D_CMD_READBACK_GB_IMAGE                          = 1103,
+   SVGA_3D_CMD_READBACK_GB_SURFACE                        = 1104,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        = 1105,
+   SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      = 1106,
+
+   SVGA_3D_CMD_DEFINE_GB_CONTEXT                          = 1107,
+   SVGA_3D_CMD_DESTROY_GB_CONTEXT                         = 1108,
+   SVGA_3D_CMD_BIND_GB_CONTEXT                            = 1109,
+   SVGA_3D_CMD_READBACK_GB_CONTEXT                        = 1110,
+   SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      = 1111,
+
+   SVGA_3D_CMD_DEFINE_GB_SHADER                           = 1112,
+   SVGA_3D_CMD_DESTROY_GB_SHADER                          = 1113,
+   SVGA_3D_CMD_BIND_GB_SHADER                             = 1114,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE64                          = 1115,
+
+   SVGA_3D_CMD_BEGIN_GB_QUERY                             = 1116,
+   SVGA_3D_CMD_END_GB_QUERY                               = 1117,
+   SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          = 1118,
+
+   SVGA_3D_CMD_NOP                                        = 1119,
+
+   SVGA_3D_CMD_ENABLE_GART                                = 1120,
+   SVGA_3D_CMD_DISABLE_GART                               = 1121,
+   SVGA_3D_CMD_MAP_MOB_INTO_GART                          = 1122,
+   SVGA_3D_CMD_UNMAP_GART_RANGE                           = 1123,
+
+   SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     = 1124,
+   SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    = 1125,
+   SVGA_3D_CMD_BIND_GB_SCREENTARGET                       = 1126,
+   SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     = 1127,
+
+   SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  = 1128,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                = 1129,
+
+   SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 = 1130,
+
+   SVGA_3D_CMD_GB_SCREEN_DMA                              = 1131,
+   SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 = 1132,
+   SVGA_3D_CMD_GB_MOB_FENCE                               = 1133,
+   SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       = 1134,
+   SVGA_3D_CMD_DEFINE_GB_MOB64                            = 1135,
+   SVGA_3D_CMD_REDEFINE_GB_MOB64                          = 1136,
+   SVGA_3D_CMD_NOP_ERROR                                  = 1137,
+
+   SVGA_3D_CMD_SET_VERTEX_STREAMS                         = 1138,
+   SVGA_3D_CMD_SET_VERTEX_DECLS                           = 1139,
+   SVGA_3D_CMD_SET_VERTEX_DIVISORS                        = 1140,
+   SVGA_3D_CMD_DRAW                                       = 1141,
+   SVGA_3D_CMD_DRAW_INDEXED                               = 1142,
+
+   /*
+    * DX10 Commands
+    */
+   SVGA_3D_CMD_DX_MIN                                     = 1143,
+   SVGA_3D_CMD_DX_DEFINE_CONTEXT                          = 1143,
+   SVGA_3D_CMD_DX_DESTROY_CONTEXT                         = 1144,
+   SVGA_3D_CMD_DX_BIND_CONTEXT                            = 1145,
+   SVGA_3D_CMD_DX_READBACK_CONTEXT                        = 1146,
+   SVGA_3D_CMD_DX_INVALIDATE_CONTEXT                      = 1147,
+   SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER              = 1148,
+   SVGA_3D_CMD_DX_SET_SHADER_RESOURCES                    = 1149,
+   SVGA_3D_CMD_DX_SET_SHADER                              = 1150,
+   SVGA_3D_CMD_DX_SET_SAMPLERS                            = 1151,
+   SVGA_3D_CMD_DX_DRAW                                    = 1152,
+   SVGA_3D_CMD_DX_DRAW_INDEXED                            = 1153,
+   SVGA_3D_CMD_DX_DRAW_INSTANCED                          = 1154,
+   SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED                  = 1155,
+   SVGA_3D_CMD_DX_DRAW_AUTO                               = 1156,
+   SVGA_3D_CMD_DX_SET_INPUT_LAYOUT                        = 1157,
+   SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS                      = 1158,
+   SVGA_3D_CMD_DX_SET_INDEX_BUFFER                        = 1159,
+   SVGA_3D_CMD_DX_SET_TOPOLOGY                            = 1160,
+   SVGA_3D_CMD_DX_SET_RENDERTARGETS                       = 1161,
+   SVGA_3D_CMD_DX_SET_BLEND_STATE                         = 1162,
+   SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE                  = 1163,
+   SVGA_3D_CMD_DX_SET_RASTERIZER_STATE                    = 1164,
+   SVGA_3D_CMD_DX_DEFINE_QUERY                            = 1165,
+   SVGA_3D_CMD_DX_DESTROY_QUERY                           = 1166,
+   SVGA_3D_CMD_DX_BIND_QUERY                              = 1167,
+   SVGA_3D_CMD_DX_SET_QUERY_OFFSET                        = 1168,
+   SVGA_3D_CMD_DX_BEGIN_QUERY                             = 1169,
+   SVGA_3D_CMD_DX_END_QUERY                               = 1170,
+   SVGA_3D_CMD_DX_READBACK_QUERY                          = 1171,
+   SVGA_3D_CMD_DX_SET_PREDICATION                         = 1172,
+   SVGA_3D_CMD_DX_SET_SOTARGETS                           = 1173,
+   SVGA_3D_CMD_DX_SET_VIEWPORTS                           = 1174,
+   SVGA_3D_CMD_DX_SET_SCISSORRECTS                        = 1175,
+   SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW                 = 1176,
+   SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW                 = 1177,
+   SVGA_3D_CMD_DX_PRED_COPY_REGION                        = 1178,
+   SVGA_3D_CMD_DX_PRED_COPY                               = 1179,
+   SVGA_3D_CMD_DX_STRETCHBLT                              = 1180,
+   SVGA_3D_CMD_DX_GENMIPS                                 = 1181,
+   SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE                      = 1182,
+   SVGA_3D_CMD_DX_READBACK_SUBRESOURCE                    = 1183,
+   SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE                  = 1184,
+   SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW              = 1185,
+   SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW             = 1186,
+   SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW                = 1187,
+   SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW               = 1188,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW                = 1189,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW               = 1190,
+   SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT                    = 1191,
+   SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT                   = 1192,
+   SVGA_3D_CMD_DX_DEFINE_BLEND_STATE                      = 1193,
+   SVGA_3D_CMD_DX_DESTROY_BLEND_STATE                     = 1194,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE               = 1195,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE              = 1196,
+   SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE                 = 1197,
+   SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE                = 1198,
+   SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE                    = 1199,
+   SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE                   = 1200,
+   SVGA_3D_CMD_DX_DEFINE_SHADER                           = 1201,
+   SVGA_3D_CMD_DX_DESTROY_SHADER                          = 1202,
+   SVGA_3D_CMD_DX_BIND_SHADER                             = 1203,
+   SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT                     = 1204,
+   SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT                    = 1205,
+   SVGA_3D_CMD_DX_SET_STREAMOUTPUT                        = 1206,
+   SVGA_3D_CMD_DX_SET_COTABLE                             = 1207,
+   SVGA_3D_CMD_DX_READBACK_COTABLE                        = 1208,
+   SVGA_3D_CMD_DX_BUFFER_COPY                             = 1209,
+   SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER                    = 1210,
+   SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK               = 1211,
+   SVGA_3D_CMD_DX_MOVE_QUERY                              = 1212,
+   SVGA_3D_CMD_DX_BIND_ALL_QUERY                          = 1213,
+   SVGA_3D_CMD_DX_READBACK_ALL_QUERY                      = 1214,
+   SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER               = 1215,
+   SVGA_3D_CMD_DX_MOB_FENCE_64                            = 1216,
+   SVGA_3D_CMD_DX_BIND_ALL_SHADER                         = 1217,
+   SVGA_3D_CMD_DX_HINT                                    = 1218,
+   SVGA_3D_CMD_DX_BUFFER_UPDATE                           = 1219,
+   SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET           = 1220,
+   SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET           = 1221,
+   SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET           = 1222,
+
+   /*
+    * Reserve some IDs to be used for the DX11 shader types.
+    */
+   SVGA_3D_CMD_DX_RESERVED1                               = 1223,
+   SVGA_3D_CMD_DX_RESERVED2                               = 1224,
+   SVGA_3D_CMD_DX_RESERVED3                               = 1225,
+
+   SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER                    = 1226,
+
+   SVGA_3D_CMD_DX_MAX                                     = 1227,
+   SVGA_3D_CMD_MAX                                        = 1227,
+   SVGA_3D_CMD_FUTURE_MAX                                 = 3000
+} SVGAFifo3dCmdId;
 
 /*
  * FIFO command format definitions:
@@ -194,54 +291,6 @@ struct {
 #include "vmware_pack_end.h"
 SVGA3dCmdHeader;
 
-typedef enum {
-   SVGA3D_SURFACE_CUBEMAP               = (1 << 0),
-
-   /*
-    * HINT flags are not enforced by the device but are useful for
-    * performance.
-    */
-   SVGA3D_SURFACE_HINT_STATIC           = (1 << 1),
-   SVGA3D_SURFACE_HINT_DYNAMIC          = (1 << 2),
-   SVGA3D_SURFACE_HINT_INDEXBUFFER      = (1 << 3),
-   SVGA3D_SURFACE_HINT_VERTEXBUFFER     = (1 << 4),
-   SVGA3D_SURFACE_HINT_TEXTURE          = (1 << 5),
-   SVGA3D_SURFACE_HINT_RENDERTARGET     = (1 << 6),
-   SVGA3D_SURFACE_HINT_DEPTHSTENCIL     = (1 << 7),
-   SVGA3D_SURFACE_HINT_WRITEONLY        = (1 << 8),
-   SVGA3D_SURFACE_MASKABLE_ANTIALIAS    = (1 << 9),
-   SVGA3D_SURFACE_AUTOGENMIPMAPS        = (1 << 10),
-   SVGA3D_SURFACE_DECODE_RENDERTARGET   = (1 << 11),
-
-   /*
-    * Is this surface using a base-level pitch for it's mob backing?
-    *
-    * This flag is not intended to be set by guest-drivers, but is instead
-    * set by the device when the surface is bound to a mob with a specified
-    * pitch.
-    */
-   SVGA3D_SURFACE_MOB_PITCH             = (1 << 12),
-
-   SVGA3D_SURFACE_INACTIVE              = (1 << 13),
-   SVGA3D_SURFACE_HINT_RT_LOCKABLE      = (1 << 14),
-   SVGA3D_SURFACE_VOLUME                = (1 << 15),
-
-   /*
-    * Required to be set on a surface to bind it to a screen target.
-    */
-   SVGA3D_SURFACE_SCREENTARGET          = (1 << 16),
-
-   SVGA3D_SURFACE_RESERVED1             = (1 << 17),
-   SVGA3D_SURFACE_1D                    = (1 << 18),
-   SVGA3D_SURFACE_ARRAY                 = (1 << 19),
-
-} SVGA3dSurfaceFlags;
-
-#define SVGA3D_SURFACE_HB_DISALLOWED_MASK (SVGA3D_SURFACE_SCREENTARGET | \
-                                           SVGA3D_SURFACE_MOB_PITCH    | \
-                                           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
-                                           SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
-
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -666,6 +715,128 @@ struct {
 #include "vmware_pack_end.h"
 SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 primitiveCount;        /* How many primitives to render */
+   uint32 startVertexLocation;   /* Which vertex do we start rendering at. */
+
+   uint8 primitiveType;          /* SVGA3dPrimitiveType */
+   uint8 padding[3];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDraw;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint8 primitiveType;       /* SVGA3dPrimitiveType */
+
+   uint32 indexBufferSid;     /* Valid index buffer sid. */
+   uint32 indexBufferOffset;  /* Byte offset into the vertex buffer, almost */
+			      /* always 0 for DX9 guests, non-zero for OpenGL */
+                              /* guests.  We can't represent non-multiple of */
+                              /* stride offsets in D3D9Renderer... */
+   uint8 indexBufferStride;   /* Allowable values = 1, 2, or 4 */
+
+   int32 baseVertexLocation;  /* Bias applied to the index when selecting a */
+                              /* vertex from the streams, may be negative */
+
+   uint32 primitiveCount;     /* How many primitives to render */
+   uint32 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDrawIndexed;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * Describe a vertex array's data type, and define how it is to be
+    * used by the fixed function pipeline or the vertex shader. It
+    * isn't useful to have two VertexDecls with the same
+    * VertexArrayIdentity in one draw call.
+    */
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;          /* SVGA3dDeclType */
+   uint8 method;        /* SVGA3dDeclMethod */
+   uint8 usage;         /* SVGA3dDeclUsage */
+   uint8 usageIndex;
+   uint8 padding;
+
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexElement;
+
+/*
+ * Should the vertex element respect the stream value?  The high bit of the
+ * stream should be set to indicate that the stream should be respected.  If
+ * the high bit is not set, the stream will be ignored and replaced by the index
+ * of the position of the currently considered vertex element.
+ *
+ * All guests should set this bit and correctly specify the stream going
+ * forward.
+ */
+#define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7)
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numElements;
+
+   /*
+    * Followed by numElements SVGA3dVertexElement structures.
+    *
+    * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDecls;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexStream;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numStreams;
+   /*
+    * Followed by numStream SVGA3dVertexStream structures.
+    *
+    * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexStreams;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   uint32 numDivisors;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDivisors;
+
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -988,38 +1159,6 @@ struct SVGA3dCmdScreenDMA {
 #include "vmware_pack_end.h"
 SVGA3dCmdScreenDMA;        /* SVGA_3D_CMD_SCREEN_DMA */
 
-/*
- * Set Unity Surface Cookie
- *
- * Associates the supplied cookie with the surface id for use with
- * Unity.  This cookie is a hint from guest to host, there is no way
- * for the guest to readback the cookie and the host is free to drop
- * the cookie association at will.  The default value for the cookie
- * on all surfaces is 0.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdSetUnitySurfaceCookie {
-   uint32 sid;
-   uint64 cookie;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdSetUnitySurfaceCookie;   /* SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE */
-
-/*
- * Open a context-specific surface in a non-context-specific manner.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdOpenContextSurface {
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdOpenContextSurface;   /* SVGA_3D_CMD_OPEN_CONTEXT_SURFACE */
-
-
 /*
  * Logic ops
  */
@@ -1139,8 +1278,8 @@ struct SVGA3dCmdLogicOpsClearTypeBlend {
    uint32 gamma;
    uint32 color;
    uint32 color2;
-   int alphaOffsetX;
-   int alphaOffsetY;
+   int32 alphaOffsetX;
+   int32 alphaOffsetY;
    /* Followed by variable number of SVGA3dBox structures */
 }
 #include "vmware_pack_end.h"
@@ -1151,9 +1290,77 @@ SVGA3dCmdLogicOpsClearTypeBlend;   /* SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND */
  * Guest-backed objects definitions.
  */
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAMobFormat ptDepth;
+   uint32 sizeInBytes;
+   PPN64 base;
+}
+#include "vmware_pack_end.h"
+SVGAOTableMobEntry;
+#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceFormat format;
+   SVGA3dSurfaceFlags surfaceFlags;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   SVGAMobId mobid;
+   uint32 arraySize;
+   uint32 mobPitch;
+   uint32 pad[5];
+}
+#include "vmware_pack_end.h"
+SVGAOTableSurfaceEntry;
+#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableContextEntry;
+#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableShaderEntry;
+#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry))
+
 #define SVGA_STFLAG_PRIMARY (1 << 0)
 typedef uint32 SVGAScreenTargetFlags;
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceImageId image;
+   uint32 width;
+   uint32 height;
+   int32 xRoot;
+   int32 yRoot;
+   SVGAScreenTargetFlags flags;
+   uint32 dpi;
+   uint32 pad[7];
+}
+#include "vmware_pack_end.h"
+SVGAOTableScreenTargetEntry;
+#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \
+	(sizeof(SVGAOTableScreenTargetEntry))
+
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -1178,6 +1385,209 @@ struct {
 #include "vmware_pack_end.h"
 SVGA3dShaderConstBool;
 
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;
+   uint8 methodUsage;
+   uint8 usageIndex;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexElement;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint16 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexStream;
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dRect viewport;
+   SVGA3dRect scissorRect;
+   SVGA3dZRange zRange;
+
+   SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX];
+   SVGAGBVertexElement decl1[4];
+
+   uint32 renderStates[SVGA3D_RS_MAX];
+   SVGAGBVertexElement decl2[18];
+   uint32 pad0[2];
+
+   struct {
+      SVGA3dFace face;
+      SVGA3dMaterial material;
+   } material;
+
+   float clipPlanes[SVGA3D_NUM_CLIPPLANES][4];
+   float matrices[SVGA3D_TRANSFORM_MAX][16];
+
+   SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS];
+   SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS];
+
+   /*
+    * Shaders currently bound
+    */
+   uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX];
+   SVGAGBVertexElement decl3[10];
+   uint32 pad1[3];
+
+   uint32 occQueryActive;
+   uint32 occQueryValue;
+
+   /*
+    * Int/Bool Shader constants
+    */
+   SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   uint16 pShaderBValues;
+   uint16 vShaderBValues;
+
+
+   SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS];
+   SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS];
+   uint32 numVertexDecls;
+   uint32 numVertexStreams;
+   uint32 numVertexDivisors;
+   uint32 pad2[30];
+
+   /*
+    * Texture Stages
+    *
+    * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the
+    * textureStages array.
+    * SVGA3D_TS_COLOR_KEY is in tsColorKey.
+    */
+   uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS];
+   uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1];
+   uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS];
+
+   /*
+    * Float Shader constants.
+    */
+   SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX];
+   SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX];
+}
+#include "vmware_pack_end.h"
+SVGAGBContextData;
+#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData))
+
+/*
+ * SVGA3dCmdSetOTableBase --
+ *
+ * This command allows the guest to specify the base PPN of the
+ * specified object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase;  /* SVGA_3D_CMD_SET_OTABLE_BASE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN64 baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase64;  /* SVGA_3D_CMD_SET_OTABLE_BASE64 */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdReadbackOTable;  /* SVGA_3D_CMD_READBACK_OTABLE */
+
+/*
+ * Define a memory object (Mob) in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob;   /* SVGA_3D_CMD_DEFINE_GB_MOB */
+
+
+/*
+ * Destroys an object in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDestroyGBMob {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDestroyGBMob;   /* SVGA_3D_CMD_DESTROY_GB_MOB */
+
+
+/*
+ * Define a memory object (Mob) in the OTable with a PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob64;   /* SVGA_3D_CMD_DEFINE_GB_MOB64 */
+
+/*
+ * Redefine an object in the OTable with PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdRedefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdRedefineGBMob64;   /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */
+
+/*
+ * Notification that the page tables have been modified.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdUpdateGBMobMapping {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdUpdateGBMobMapping;   /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */
+
 /*
  * Define a guest-backed surface.
  */
@@ -1243,7 +1653,7 @@ SVGA3dCmdBindGBSurfaceWithPitch;   /* SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH */
 
 typedef
 #include "vmware_pack_begin.h"
-struct{
+struct SVGA3dCmdCondBindGBSurface {
    uint32 sid;
    SVGAMobId testMobid;
    SVGAMobId mobid;
@@ -1477,18 +1887,6 @@ struct SVGA3dCmdDestroyGBShader {
 #include "vmware_pack_end.h"
 SVGA3dCmdDestroyGBShader;   /* SVGA_3D_CMD_DESTROY_GB_SHADER */
 
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdBindGBShaderConsts {
-   uint32 cid;
-   SVGA3dShaderType shaderType;
-   SVGA3dShaderConstType shaderConstType;
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdBindGBShaderConsts;   /* SVGA_3D_CMD_BIND_SHADERCONSTS */
-
 typedef
 #include "vmware_pack_begin.h"
 struct {
@@ -1553,7 +1951,7 @@ typedef
 #include "vmware_pack_begin.h"
 struct {
    SVGAMobId mobid;
-   uint32 fbOffset;
+   uint32 mustBeZero;
    uint32 initialized;
 }
 #include "vmware_pack_end.h"
@@ -1649,6 +2047,6 @@ struct {
    uint32 mobOffset;
 }
 #include "vmware_pack_end.h"
-SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE*/
+SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE */
 
-#endif // _SVGA3D_CMD_H_
+#endif /* _SVGA3D_CMD_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_devcaps.h b/src/gallium/drivers/svga/include/svga3d_devcaps.h
index 915f3c7571a..ade210b4172 100644
--- a/src/gallium/drivers/svga/include/svga3d_devcaps.h
+++ b/src/gallium/drivers/svga/include/svga3d_devcaps.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -223,9 +223,230 @@ typedef enum {
     */
    SVGA3D_DEVCAP_TS_COLOR_KEY                      = 93, /* boolean */
 
+   /*
+    * Deprecated.
+    */
+   SVGA3D_DEVCAP_DEAD2                             = 94,
+
+   /*
+    * Does the device support the DX commands?
+    */
+   SVGA3D_DEVCAP_DX                                = 95,
+
+   /*
+    * What is the maximum size of a texture array?
+    *
+    * (Even if this cap is zero, cubemaps are still allowed.)
+    */
+   SVGA3D_DEVCAP_MAX_TEXTURE_ARRAY_SIZE            = 96,
+
+   /*
+    * What is the maximum number of vertex buffers that can
+    * be used in the DXContext inputAssembly?
+    */
+   SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS              = 97,
+
+   /*
+    * What is the maximum number of constant buffers
+    * that can be expected to work correctly with a
+    * DX context?
+    */
+   SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS           = 98,
+
+   /*
+    * Does the device support provoking vertex control?
+    * If zero, the first vertex will always be the provoking vertex.
+    */
+   SVGA3D_DEVCAP_DX_PROVOKING_VERTEX               = 99,
+
+   SVGA3D_DEVCAP_DXFMT_X8R8G8B8                    = 100,
+   SVGA3D_DEVCAP_DXFMT_A8R8G8B8                    = 101,
+   SVGA3D_DEVCAP_DXFMT_R5G6B5                      = 102,
+   SVGA3D_DEVCAP_DXFMT_X1R5G5B5                    = 103,
+   SVGA3D_DEVCAP_DXFMT_A1R5G5B5                    = 104,
+   SVGA3D_DEVCAP_DXFMT_A4R4G4B4                    = 105,
+   SVGA3D_DEVCAP_DXFMT_Z_D32                       = 106,
+   SVGA3D_DEVCAP_DXFMT_Z_D16                       = 107,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8                     = 108,
+   SVGA3D_DEVCAP_DXFMT_Z_D15S1                     = 109,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8                  = 110,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4           = 111,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE16                 = 112,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8           = 113,
+   SVGA3D_DEVCAP_DXFMT_DXT1                        = 114,
+   SVGA3D_DEVCAP_DXFMT_DXT2                        = 115,
+   SVGA3D_DEVCAP_DXFMT_DXT3                        = 116,
+   SVGA3D_DEVCAP_DXFMT_DXT4                        = 117,
+   SVGA3D_DEVCAP_DXFMT_DXT5                        = 118,
+   SVGA3D_DEVCAP_DXFMT_BUMPU8V8                    = 119,
+   SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5                  = 120,
+   SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8                = 121,
+   SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1                = 122,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S10E5                  = 123,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S23E8                  = 124,
+   SVGA3D_DEVCAP_DXFMT_A2R10G10B10                 = 125,
+   SVGA3D_DEVCAP_DXFMT_V8U8                        = 126,
+   SVGA3D_DEVCAP_DXFMT_Q8W8V8U8                    = 127,
+   SVGA3D_DEVCAP_DXFMT_CxV8U8                      = 128,
+   SVGA3D_DEVCAP_DXFMT_X8L8V8U8                    = 129,
+   SVGA3D_DEVCAP_DXFMT_A2W10V10U10                 = 130,
+   SVGA3D_DEVCAP_DXFMT_ALPHA8                      = 131,
+   SVGA3D_DEVCAP_DXFMT_R_S10E5                     = 132,
+   SVGA3D_DEVCAP_DXFMT_R_S23E8                     = 133,
+   SVGA3D_DEVCAP_DXFMT_RG_S10E5                    = 134,
+   SVGA3D_DEVCAP_DXFMT_RG_S23E8                    = 135,
+   SVGA3D_DEVCAP_DXFMT_BUFFER                      = 136,
+   SVGA3D_DEVCAP_DXFMT_Z_D24X8                     = 137,
+   SVGA3D_DEVCAP_DXFMT_V16U16                      = 138,
+   SVGA3D_DEVCAP_DXFMT_G16R16                      = 139,
+   SVGA3D_DEVCAP_DXFMT_A16B16G16R16                = 140,
+   SVGA3D_DEVCAP_DXFMT_UYVY                        = 141,
+   SVGA3D_DEVCAP_DXFMT_YUY2                        = 142,
+   SVGA3D_DEVCAP_DXFMT_NV12                        = 143,
+   SVGA3D_DEVCAP_DXFMT_AYUV                        = 144,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS       = 145,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT           = 146,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT           = 147,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS          = 148,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT             = 149,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT              = 150,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT              = 151,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS       = 152,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT           = 153,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM          = 154,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT           = 155,
+   SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS             = 156,
+   SVGA3D_DEVCAP_DXFMT_R32G32_UINT                 = 157,
+   SVGA3D_DEVCAP_DXFMT_R32G32_SINT                 = 158,
+   SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS           = 159,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT        = 160,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS    = 161,
+   SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT     = 162,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS        = 163,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT            = 164,
+   SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT             = 165,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS           = 166,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM              = 167,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB         = 168,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT               = 169,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT               = 170,
+   SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS             = 171,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UINT                 = 172,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SINT                 = 173,
+   SVGA3D_DEVCAP_DXFMT_R32_TYPELESS                = 174,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT                   = 175,
+   SVGA3D_DEVCAP_DXFMT_R32_UINT                    = 176,
+   SVGA3D_DEVCAP_DXFMT_R32_SINT                    = 177,
+   SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS              = 178,
+   SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT           = 179,
+   SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS       = 180,
+   SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT        = 181,
+   SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS               = 182,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UNORM                  = 183,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UINT                   = 184,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SINT                   = 185,
+   SVGA3D_DEVCAP_DXFMT_R16_TYPELESS                = 186,
+   SVGA3D_DEVCAP_DXFMT_R16_UNORM                   = 187,
+   SVGA3D_DEVCAP_DXFMT_R16_UINT                    = 188,
+   SVGA3D_DEVCAP_DXFMT_R16_SNORM                   = 189,
+   SVGA3D_DEVCAP_DXFMT_R16_SINT                    = 190,
+   SVGA3D_DEVCAP_DXFMT_R8_TYPELESS                 = 191,
+   SVGA3D_DEVCAP_DXFMT_R8_UNORM                    = 192,
+   SVGA3D_DEVCAP_DXFMT_R8_UINT                     = 193,
+   SVGA3D_DEVCAP_DXFMT_R8_SNORM                    = 194,
+   SVGA3D_DEVCAP_DXFMT_R8_SINT                     = 195,
+   SVGA3D_DEVCAP_DXFMT_P8                          = 196,
+   SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP          = 197,
+   SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM             = 198,
+   SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM             = 199,
+   SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS                = 200,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB              = 201,
+   SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS                = 202,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB              = 203,
+   SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS                = 204,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB              = 205,
+   SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS                = 206,
+   SVGA3D_DEVCAP_DXFMT_ATI1                        = 207,
+   SVGA3D_DEVCAP_DXFMT_BC4_SNORM                   = 208,
+   SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS                = 209,
+   SVGA3D_DEVCAP_DXFMT_ATI2                        = 210,
+   SVGA3D_DEVCAP_DXFMT_BC5_SNORM                   = 211,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM  = 212,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS           = 213,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB         = 214,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS           = 215,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB         = 216,
+   SVGA3D_DEVCAP_DXFMT_Z_DF16                      = 217,
+   SVGA3D_DEVCAP_DXFMT_Z_DF24                      = 218,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT                 = 219,
+   SVGA3D_DEVCAP_DXFMT_YV12                        = 220,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT          = 221,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT          = 222,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM          = 223,
+   SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT                = 224,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM           = 225,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM              = 226,
+   SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT                = 227,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UNORM                = 228,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SNORM                = 229,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT                   = 230,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SNORM                  = 231,
+   SVGA3D_DEVCAP_DXFMT_R16_FLOAT                   = 232,
+   SVGA3D_DEVCAP_DXFMT_D16_UNORM                   = 233,
+   SVGA3D_DEVCAP_DXFMT_A8_UNORM                    = 234,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM                   = 235,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM                   = 236,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM                   = 237,
+   SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM                = 238,
+   SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM              = 239,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM              = 240,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM              = 241,
+   SVGA3D_DEVCAP_DXFMT_BC4_UNORM                   = 242,
+   SVGA3D_DEVCAP_DXFMT_BC5_UNORM                   = 243,
+
    SVGA3D_DEVCAP_MAX                       /* This must be the last index. */
 } SVGA3dDevCapIndex;
 
+/*
+ * Bit definitions for DXFMT devcaps
+ *
+ *
+ * SUPPORTED: Can the format be defined?
+ * SHADER_SAMPLE: Can the format be sampled from a shader?
+ * COLOR_RENDERTARGET: Can the format be a color render target?
+ * DEPTH_RENDERTARGET: Can the format be a depth render target?
+ * BLENDABLE: Is the format blendable?
+ * MIPS: Does the format support mip levels?
+ * ARRAY: Does the format support texture arrays?
+ * VOLUME: Does the format support having volume?
+ * MULTISAMPLE_2: Does the format support 2x multisample?
+ * MULTISAMPLE_4: Does the format support 4x multisample?
+ * MULTISAMPLE_8: Does the format support 8x multisample?
+ */
+#define SVGA3D_DXFMT_SUPPORTED                (1 <<  0)
+#define SVGA3D_DXFMT_SHADER_SAMPLE            (1 <<  1)
+#define SVGA3D_DXFMT_COLOR_RENDERTARGET       (1 <<  2)
+#define SVGA3D_DXFMT_DEPTH_RENDERTARGET       (1 <<  3)
+#define SVGA3D_DXFMT_BLENDABLE                (1 <<  4)
+#define SVGA3D_DXFMT_MIPS                     (1 <<  5)
+#define SVGA3D_DXFMT_ARRAY                    (1 <<  6)
+#define SVGA3D_DXFMT_VOLUME                   (1 <<  7)
+#define SVGA3D_DXFMT_DX_VERTEX_BUFFER         (1 <<  8)
+#define SVGADX_DXFMT_MULTISAMPLE_2            (1 <<  9)
+#define SVGADX_DXFMT_MULTISAMPLE_4            (1 << 10)
+#define SVGADX_DXFMT_MULTISAMPLE_8            (1 << 11)
+#define SVGADX_DXFMT_MAX                      (1 << 12)
+
+/*
+ * Convenience mask for any multisample capability.
+ *
+ * The multisample bits imply both load and render capability.
+ */
+#define SVGA3D_DXFMT_MULTISAMPLE ( \
+           SVGADX_DXFMT_MULTISAMPLE_2 | \
+           SVGADX_DXFMT_MULTISAMPLE_4 | \
+           SVGADX_DXFMT_MULTISAMPLE_8 )
+
 typedef union {
    Bool   b;
    uint32 u;
@@ -233,4 +454,4 @@ typedef union {
    float  f;
 } SVGA3dDevCapResult;
 
-#endif // _SVGA3D_DEVCAPS_H_
+#endif /* _SVGA3D_DEVCAPS_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_limits.h b/src/gallium/drivers/svga/include/svga3d_limits.h
index 367e8cf7a50..a1c36877ad5 100644
--- a/src/gallium/drivers/svga/include/svga3d_limits.h
+++ b/src/gallium/drivers/svga/include/svga3d_limits.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
 #define SVGA3D_NUM_CLIPPLANES                   6
 #define SVGA3D_MAX_RENDER_TARGETS               8
 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  (SVGA3D_MAX_RENDER_TARGETS)
+#define SVGA3D_MAX_UAVIEWS                      8
 #define SVGA3D_MAX_CONTEXT_IDS                  256
 #define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
 
@@ -56,9 +57,6 @@
 
 #define SVGA3D_NUM_TEXTURE_UNITS                32
 #define SVGA3D_NUM_LIGHTS                       8
-#define SVGA3D_MAX_VIDEODECODERS                8
-#define SVGA3D_MAX_VIDEOPROCESSORS              8
-#define SVGA3D_MAX_VIDEODECODER_FRAMES          400
 
 /*
  * Maximum size in dwords of shader text the SVGA device will allow.
@@ -98,4 +96,4 @@
  */
 #define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
 
-#endif // _SVGA3D_LIMITS_H_
+#endif /* _SVGA3D_LIMITS_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h
index 01705f31456..b44ce648f59 100644
--- a/src/gallium/drivers/svga/include/svga3d_reg.h
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,6 +43,7 @@
 #include "svga3d_types.h"
 #include "svga3d_limits.h"
 #include "svga3d_cmd.h"
+#include "svga3d_dx.h"
 #include "svga3d_devcaps.h"
 
 
diff --git a/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
index ce5475b6f04..efa358b5449 100644
--- a/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
@@ -1,27 +1,29 @@
-/**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+/**************************************************************************
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
+ * Copyright � 1998-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
  *
- **********************************************************/
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
 
 /*
  * svga3d_surfacedefs.h --
@@ -53,645 +55,851 @@
  */
 
 enum svga3d_block_desc {
-	SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
-	SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel
-						    data */
-	SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel
-						    data */
-	SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video
-						    U and V */
-	SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel
-						    data */
-	SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel
-						    data */
-	SVGA3DBLOCKDESC_STENCIL     = 1 << 1,    /* Block with a stencil
-						    channel */
-	SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel
-						    data */
-	SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel
-						    data */
-	SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel
-						    data */
-	SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance
-						    data */
-	SVGA3DBLOCKDESC_DEPTH       = 1 << 2,    /* Block with depth channel */
-	SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha
-						    channel */
-	SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel
-						    data */
-	SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of
-						    data */
-	SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of
-						    data depending on the
-						    compression method used */
-	SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE
-						    floating point
-						    representation in
-						    all channels */
-	SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store
-						    data. */
-	SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
-	SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
-	SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
-	SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
-	SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV,
-						    e.g., NV12. */
-	SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate
-						    Y, U, V, e.g., YV12. */
-
-	SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
-	SVGA3DBLOCKDESC_GREEN,
-	SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
-	SVGA3DBLOCKDESC_BLUE,
-	SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
-	SVGA3DBLOCKDESC_V,
-	SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
-	SVGA3DBLOCKDESC_LUMINANCE,
-	SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
-	SVGA3DBLOCKDESC_W,
-	SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
-	SVGA3DBLOCKDESC_V |
-	SVGA3DBLOCKDESC_W |
-	SVGA3DBLOCKDESC_Q,
-	SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
-	SVGA3DBLOCKDESC_IEEE_FP,
-	SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
-	SVGA3DBLOCKDESC_GREEN,
-	SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
-	SVGA3DBLOCKDESC_BLUE,
-	SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
-	SVGA3DBLOCKDESC_ALPHA,
-	SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
-	SVGA3DBLOCKDESC_STENCIL,
-	SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
-	SVGA3DBLOCKDESC_Y,
-	SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
-	SVGA3DBLOCKDESC_Y |
-	SVGA3DBLOCKDESC_U_VIDEO |
-	SVGA3DBLOCKDESC_V_VIDEO,
-	SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
-	SVGA3DBLOCKDESC_EXP,
-	SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
-	SVGA3DBLOCKDESC_SRGB,
-	SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-	SVGA3DBLOCKDESC_2PLANAR_YUV,
-	SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-	SVGA3DBLOCKDESC_3PLANAR_YUV,
-};
 
-/*
- * SVGA3dSurfaceDesc describes the actual pixel data.
- *
- * This structure provides the following information:
- *    1. Block description.
- *    2. Dimensions of a block in the surface.
- *    3. Size of block in bytes.
- *    4. Bit depth of the pixel data.
- *    5. Channel bit depths and masks (if applicable).
- */
-#define SVGA3D_CHANNEL_DEF(type)		\
-	struct {				\
-		union {				\
-			type blue;              \
-			type u;                 \
-			type uv_video;          \
-			type u_video;           \
-		};				\
-		union {				\
-			type green;             \
-			type v;                 \
-			type stencil;           \
-			type v_video;           \
-		};				\
-		union {				\
-			type red;               \
-			type w;                 \
-			type luminance;         \
-			type y;                 \
-			type depth;             \
-			type data;              \
-		};				\
-		union {				\
-			type alpha;             \
-			type q;                 \
-			type exp;               \
-		};				\
-	}
-
-struct svga3d_surface_desc {
-	enum svga3d_block_desc block_desc;
-	SVGA3dSize block_size;
-	uint32 bytes_per_block;
-	uint32 pitch_bytes_per_block;
-
-	struct {
-		uint32 total;
-		SVGA3D_CHANNEL_DEF(uint8);
-	} bit_depth;
-
-	struct {
-		SVGA3D_CHANNEL_DEF(uint8);
-	} bit_offset;
+   SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
+   SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel data */
+   SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel data */
+   SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel data */
+   SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel data */
+   SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel data */
+   SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel data */
+   SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel data */
+   SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance data */
+   SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha channel */
+   SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel data */
+   SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of data */
+   SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of data depending
+                                               on the compression method used */
+   SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE floating point
+                                               representation in all channels */
+   SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video U and V */
+   SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store data. */
+   SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
+   SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
+   SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
+   SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
+   SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV, e.g., NV12. */
+   SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate Y, U, V, e.g., YV12. */
+   SVGA3DBLOCKDESC_DEPTH       = 1 << 15,   /* Block with depth channel */
+   SVGA3DBLOCKDESC_STENCIL     = 1 << 16,   /* Block with a stencil channel */
+
+   SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V,
+   SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_LUMINANCE,
+   SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_W,
+   SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V |
+                                SVGA3DBLOCKDESC_W |
+                                SVGA3DBLOCKDESC_Q,
+   SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_IEEE_FP,
+   SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
+                                SVGA3DBLOCKDESC_STENCIL,
+   SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
+                                SVGA3DBLOCKDESC_Y,
+   SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
+                                SVGA3DBLOCKDESC_Y |
+                                SVGA3DBLOCKDESC_U_VIDEO |
+                                SVGA3DBLOCKDESC_V_VIDEO,
+   SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_EXP,
+   SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+                                     SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_2PLANAR_YUV,
+   SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_3PLANAR_YUV,
 };
 
-static const struct svga3d_surface_desc svga3d_surface_descs[] = {
-	{SVGA3DBLOCKDESC_NONE,
-	 {1, 1, 1},  0, 0, {0, {{0}, {0}, {0}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_FORMAT_INVALID */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_X8R8G8B8 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_A8R8G8B8 */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  2, 2, {16, {{5}, {6}, {5}, {0} } },
-	 {{{0}, {5}, {11}, {0} } } },    /* SVGA3D_R5G6B5 */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  2, 2, {15, {{5}, {5}, {5}, {0} } },
-	 {{{0}, {5}, {10}, {0} } } },    /* SVGA3D_X1R5G5B5 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {5}, {1} } },
-	 {{{0}, {5}, {10}, {15} } } },   /* SVGA3D_A1R5G5B5 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  2, 2, {16, {{4}, {4}, {4}, {4} } },
-	 {{{0}, {4}, {8}, {12} } } },    /* SVGA3D_A4R4G4B4 */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D32 */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D16 */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8 */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  2, 2, {16, {{0}, {1}, {15}, {0} } },
-	 {{{0}, {15}, {0}, {0} } } },    /* SVGA3D_Z_D15S1 */
-
-	{SVGA3DBLOCKDESC_LUMINANCE,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE8 */
-
-	{SVGA3DBLOCKDESC_LA,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {4}, {4} } },
-	 {{{0}, {0}, {0}, {4} } } },     /* SVGA3D_LUMINANCE4_ALPHA4 */
-
-	{SVGA3DBLOCKDESC_LUMINANCE,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE16 */
-
-	{SVGA3DBLOCKDESC_LA,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_LUMINANCE8_ALPHA8 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT1 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT2 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT3 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT4 */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT5 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_BUMPU8V8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {6}, {0} } },
-	 {{{11}, {6}, {0}, {0} } } },    /* SVGA3D_BUMPL6V5U5 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPX8L8V8U8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  3, 3, {24, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPL8V8U8 */
-
-	{SVGA3DBLOCKDESC_RGBA_FP,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_ARGB_S10E5 */
-
-	{SVGA3DBLOCKDESC_RGBA_FP,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_ARGB_S23E8 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2R10G10B10 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_V8U8 */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{24}, {16}, {8}, {0} } } },   /* SVGA3D_Q8W8V8U8 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_CxV8U8 */
-
-	{SVGA3DBLOCKDESC_UVL,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_X8L8V8U8 */
-
-	{SVGA3DBLOCKDESC_UVWA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2W10V10U10 */
-
-	{SVGA3DBLOCKDESC_ALPHA,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {0}, {8} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_ALPHA8 */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S10E5 */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S23E8 */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_RG_S10E5 */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_RG_S23E8 */
-
-	{SVGA3DBLOCKDESC_BUFFER,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BUFFER */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24X8 */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  4, 4, {32, {{16}, {16}, {0}, {0} } },
-	 {{{16}, {0}, {0}, {0} } } },    /* SVGA3D_V16U16 */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {0}, {16}, {0} } } },    /* SVGA3D_G16R16 */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_A16B16G16R16 */
-
-	{SVGA3DBLOCKDESC_YUV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {8}, {0} } } },     /* SVGA3D_UYVY */
-
-	{SVGA3DBLOCKDESC_YUV,
-	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_YUY2 */
-
-	{SVGA3DBLOCKDESC_NV12,
-	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_NV12 */
-
-	{SVGA3DBLOCKDESC_AYUV,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_AYUV */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_UINT */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_SINT */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGB_FP,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_FLOAT */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_UINT */
-
-	{SVGA3DBLOCKDESC_UVW,
-	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_SINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_UINT */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SNORM */
-
-	{SVGA3DBLOCKDESC_UVWQ,
-	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_UINT */
 
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_SINT */
+typedef struct SVGA3dChannelDef {
+  union {
+      uint8 blue;
+      uint8 u;
+      uint8 uv_video;
+      uint8 u_video;
+   };
+   union {
+      uint8 green;
+      uint8 v;
+      uint8 stencil;
+      uint8 v_video;
+   };
+   union {
+      uint8 red;
+      uint8 w;
+      uint8 luminance;
+      uint8 y;
+      uint8 depth;
+      uint8 data;
+   };
+   union {
+      uint8 alpha;
+      uint8 q;
+      uint8 exp;
+   };
+} SVGA3dChannelDef;
 
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G8X24_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_D32_FLOAT_S8X24_UINT */
-
-	{SVGA3DBLOCKDESC_R_FP,
-	 {1, 1, 1},  8, 8, {64, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },    /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
-
-	{SVGA3DBLOCKDESC_GREEN,
-	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {0}, {0} } },
-	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_X32_TYPELESS_G8X24_UINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_UINT */
-
-	{SVGA3DBLOCKDESC_RGB_FP,
-	 {1, 1, 1},  4, 4, {32, {{10}, {11}, {11}, {0} } },
-	 {{{0}, {10}, {21}, {0} } } },  /* SVGA3D_R11G11B10_FLOAT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBA_SRGB,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UINT */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG_FP,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_UINT */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_D32_FLOAT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_UINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_SINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_R24G8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_D24_UNORM_S8_UINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R24_UNORM_X8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_GREEN,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {0}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_X24_TYPELESS_G8_UINT */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UNORM */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UINT */
-
-	{SVGA3DBLOCKDESC_UV,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UNORM */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UINT */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SNORM */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UNORM */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UINT */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SNORM */
-
-	{SVGA3DBLOCKDESC_U,
-	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SINT */
-
-	{SVGA3DBLOCKDESC_RED,
-	 {8, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R1_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBE,
-	 {1, 1, 1},  4, 4, {32, {{9}, {9}, {9}, {5} } },
-	 {{{18}, {9}, {0}, {27} } } },   /* SVGA3D_R9G9B9E5_SHAREDEXP */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_B8G8_UNORM */
-
-	{SVGA3DBLOCKDESC_RG,
-	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_G8R8_G8B8_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_SNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_TYPELESS */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_UNORM */
-
-	{SVGA3DBLOCKDESC_COMPRESSED,
-	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_SNORM */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
-
-	{SVGA3DBLOCKDESC_RGBA,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGBA_SRGB,
-	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_RGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_TYPELESS */
-
-	{SVGA3DBLOCKDESC_RGB_SRGB,
-	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_UNORM_SRGB */
-
-	{SVGA3DBLOCKDESC_DEPTH,
-	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_DF16 */
+struct svga3d_surface_desc {
+   SVGA3dSurfaceFormat format;
+   enum svga3d_block_desc block_desc;
 
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_DF24 */
+   SVGA3dSize block_size;
+   uint32 bytes_per_block;
+   uint32 pitch_bytes_per_block;
 
-	{SVGA3DBLOCKDESC_DS,
-	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8_INT */
+   uint32 totalBitDepth;
+   SVGA3dChannelDef bitDepth;
+   SVGA3dChannelDef bitOffset;
+};
 
-	{SVGA3DBLOCKDESC_YV12,
-	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_YV12 */
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+   {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE,
+      {1, 1, 1},  0, 0,
+      0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      15, {{5}, {5}, {5}, {0}},
+      {{0}, {5}, {10}, {0}}},
+
+   {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{4}, {4}, {4}, {4}},
+      {{0}, {4}, {8}, {12}}},
+
+   {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {1}, {15}, {0}},
+      {{0}, {15}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA,
+    {1  , 1, 1},  1, 1,
+      8, {{0}, {0}, {4}, {4}},
+      {{0}, {0}, {0}, {4}}},
+
+   {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {6}, {0}},
+      {{11}, {6}, {0}, {0}}},
+
+   {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL,
+      {0, 0, 0},  0, 0,
+       0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{0}, {0}, {8}, {0}}},
+
+   {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT_X8X24_TYPELESS, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X32_TYPELESS_G8X24_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {0}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {11}, {11}, {0}},
+      {{0}, {10}, {21}, {0}}},
+
+   {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R24_UNORM_X8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X24_TYPELESS_G8_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {0}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_P8, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE,
+      {1, 1, 1},  4, 4,
+      32, {{9}, {9}, {9}, {5}},
+      {{18}, {9}, {0}, {27}}},
+
+   {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
 };
 
 
@@ -704,6 +912,16 @@ static inline uint32 clamped_umul32(uint32 a, uint32 b)
 	return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp;
 }
 
+static inline uint32 clamped_uadd32(uint32 a, uint32 b)
+{
+	uint32 c = a + b;
+	if (c < a || c < b) {
+		return MAX_UINT32;
+	}
+	return c;
+}
+
+
 static inline const struct svga3d_surface_desc *
 svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
 {
@@ -828,7 +1046,7 @@ static inline uint32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
                                SVGA3dSize baseLevelSize,
                                uint32 numMipLevels,
-                               uint32 face,
+                               uint32 layer,
                                uint32 mip)
 
 {
@@ -853,7 +1071,7 @@ svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
       }
    }
 
-   offset = mipChainBytes * face + mipChainBytesToLevel;
+   offset = mipChainBytes * layer + mipChainBytesToLevel;
 
    return offset;
 }
@@ -863,7 +1081,7 @@ static inline uint32
 svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
 				  SVGA3dSize base_level_size,
 				  uint32 num_mip_levels,
-				  bool cubemap)
+                                  uint32 num_layers)
 {
 	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
 	uint64_t total_size = 0;
@@ -876,8 +1094,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
 								  &size, 0);
 	}
 
-	if (cubemap)
-		total_size *= SVGA3D_MAX_SURFACE_FACES;
+	total_size *= num_layers;
 
 	return (total_size > (uint64_t) MAX_UINT32) ? MAX_UINT32 : 
                                                       (uint32) total_size;
diff --git a/src/gallium/drivers/svga/include/svga3d_types.h b/src/gallium/drivers/svga/include/svga3d_types.h
index fc4a6b95a2e..de711c388c6 100644
--- a/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/src/gallium/drivers/svga/include/svga3d_types.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,10 +43,6 @@
  */
 
 #define SVGA3D_INVALID_ID         ((uint32)-1)
-#define SVGA3D_INVALID_CID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SHID       SVGA3D_INVALID_ID
-
 
 typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
 typedef uint32 SVGA3dColor; /* a, r, g, b */
@@ -116,13 +112,7 @@ SVGA3dPoint;
 
 /*
  * Surface formats.
- *
- * If you modify this list, be sure to keep GLUtil.c in sync. It
- * includes the internal format definition of each surface in
- * GLUtil_ConvertSurfaceFormat, and it contains a table of
- * human-readable names in GLUtil_GetFormatName.
  */
-
 typedef enum SVGA3dSurfaceFormat {
    SVGA3D_FORMAT_INVALID               = 0,
 
@@ -155,7 +145,7 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_BUMPU8V8                     = 20,
    SVGA3D_BUMPL6V5U5                   = 21,
    SVGA3D_BUMPX8L8V8U8                 = 22,
-   SVGA3D_BUMPL8V8U8                   = 23,
+   SVGA3D_FORMAT_DEAD1                 = 23,
 
    SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
    SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
@@ -271,7 +261,7 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_B8G8R8X8_TYPELESS            = 116,
    SVGA3D_B8G8R8X8_UNORM_SRGB          = 117,
 
-   /* Advanced D3D9 depth formats. */
+   /* Advanced depth formats. */
    SVGA3D_Z_DF16                       = 118,
    SVGA3D_Z_DF24                       = 119,
    SVGA3D_Z_D24S8_INT                  = 120,
@@ -306,13 +296,157 @@ typedef enum SVGA3dSurfaceFormat {
    SVGA3D_FORMAT_MAX
 } SVGA3dSurfaceFormat;
 
+typedef uint32 SVGA3dSurfaceFlags;
+#define SVGA3D_SURFACE_CUBEMAP                (1 << 0)
+
+/*
+ * HINT flags are not enforced by the device but are useful for
+ * performance.
+ */
+#define SVGA3D_SURFACE_HINT_STATIC            (1 << 1)
+#define SVGA3D_SURFACE_HINT_DYNAMIC           (1 << 2)
+#define SVGA3D_SURFACE_HINT_INDEXBUFFER       (1 << 3)
+#define SVGA3D_SURFACE_HINT_VERTEXBUFFER      (1 << 4)
+#define SVGA3D_SURFACE_HINT_TEXTURE           (1 << 5)
+#define SVGA3D_SURFACE_HINT_RENDERTARGET      (1 << 6)
+#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL      (1 << 7)
+#define SVGA3D_SURFACE_HINT_WRITEONLY         (1 << 8)
+#define SVGA3D_SURFACE_MASKABLE_ANTIALIAS     (1 << 9)
+#define SVGA3D_SURFACE_AUTOGENMIPMAPS         (1 << 10)
+#define SVGA3D_SURFACE_DECODE_RENDERTARGET    (1 << 11)
+
+/*
+ * Is this surface using a base-level pitch for it's mob backing?
+ *
+ * This flag is not intended to be set by guest-drivers, but is instead
+ * set by the device when the surface is bound to a mob with a specified
+ * pitch.
+ */
+#define SVGA3D_SURFACE_MOB_PITCH              (1 << 12)
+
+#define SVGA3D_SURFACE_INACTIVE               (1 << 13)
+#define SVGA3D_SURFACE_HINT_RT_LOCKABLE       (1 << 14)
+#define SVGA3D_SURFACE_VOLUME                 (1 << 15)
+
+/*
+ * Required to be set on a surface to bind it to a screen target.
+ */
+#define SVGA3D_SURFACE_SCREENTARGET           (1 << 16)
+
+/*
+ * Align images in the guest-backing mob to 16-bytes.
+ */
+#define SVGA3D_SURFACE_ALIGN16                (1 << 17)
+
+#define SVGA3D_SURFACE_1D                     (1 << 18)
+#define SVGA3D_SURFACE_ARRAY                  (1 << 19)
+
+/*
+ * Bind flags.
+ * These are enforced for any surface defined with DefineGBSurface_v2.
+ */
+#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER     (1 << 20)
+#define SVGA3D_SURFACE_BIND_INDEX_BUFFER      (1 << 21)
+#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER   (1 << 22)
+#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE   (1 << 23)
+#define SVGA3D_SURFACE_BIND_RENDER_TARGET     (1 << 24)
+#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL     (1 << 25)
+#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT     (1 << 26)
+
+/*
+ * The STAGING flags notes that the surface will not be used directly by the
+ * drawing pipeline, i.e. that it will not be bound to any bind point.
+ * Staging surfaces may be used by copy operations to move data in and out
+ * of other surfaces.  No bind flags may be set on surfaces with this flag.
+ *
+ * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive
+ * updates indirectly, i.e. the surface will not be updated directly, but
+ * will receive copies from staging surfaces.
+ */
+#define SVGA3D_SURFACE_STAGING_UPLOAD         (1 << 27)
+#define SVGA3D_SURFACE_STAGING_DOWNLOAD       (1 << 28)
+#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE   (1 << 29)
+
+/*
+ * Setting this flag allow this surface to be used with the
+ * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command.  It is only valid for
+ * buffer surfaces, and no bind flags are allowed to be set on surfaces
+ * with this flag.
+ */
+#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   (1 << 30)
 
 /*
- * These are really the D3DFORMAT_OP defines from the wdk. We need
- * them so that we can query the host for what the supported surface
- * operations are (when we're using the D3D backend, in particular),
- * and so we can send those operations to the guest.
+ * Marker for the last defined bit in SVGA3dSurfaceFlags.
  */
+#define SVGA3D_SURFACE_FLAG_MAX               (1 << 31)
+
+#define SVGA3D_SURFACE_HB_DISALLOWED_MASK        \
+        (  SVGA3D_SURFACE_MOB_PITCH    |         \
+           SVGA3D_SURFACE_SCREENTARGET |         \
+           SVGA3D_SURFACE_ALIGN16 |              \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |   \
+           SVGA3D_SURFACE_STAGING_UPLOAD |       \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |     \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   \
+        )
+
+#define SVGA3D_SURFACE_2D_DISALLOWED_MASK           \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_MASKABLE_ANTIALIAS |      \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_INACTIVE |                \
+           SVGA3D_SURFACE_STAGING_UPLOAD |          \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |        \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_DX_ONLY_MASK             \
+        (  SVGA3D_SURFACE_BIND_STREAM_OUTPUT |  \
+           SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER  \
+        )
+
+#define SVGA3D_SURFACE_STAGING_MASK             \
+        (  SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD      \
+        )
+
+#define SVGA3D_SURFACE_BIND_MASK                  \
+        (  SVGA3D_SURFACE_BIND_VERTEX_BUFFER   |  \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER    |  \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |  \
+           SVGA3D_SURFACE_BIND_SHADER_RESOURCE |  \
+           SVGA3D_SURFACE_BIND_RENDER_TARGET   |  \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL   |  \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT      \
+        )
+
 typedef enum {
    SVGA3DFORMAT_OP_TEXTURE                               = 0x00000001,
    SVGA3DFORMAT_OP_VOLUMETEXTURE                         = 0x00000002,
@@ -656,25 +790,27 @@ union {
 SVGA3dLinePattern;
 
 typedef enum {
-   SVGA3D_BLENDOP_INVALID            = 0,
-   SVGA3D_BLENDOP_MIN                = 1,
-   SVGA3D_BLENDOP_ZERO               = 1,
-   SVGA3D_BLENDOP_ONE                = 2,
-   SVGA3D_BLENDOP_SRCCOLOR           = 3,
-   SVGA3D_BLENDOP_INVSRCCOLOR        = 4,
-   SVGA3D_BLENDOP_SRCALPHA           = 5,
-   SVGA3D_BLENDOP_INVSRCALPHA        = 6,
-   SVGA3D_BLENDOP_DESTALPHA          = 7,
-   SVGA3D_BLENDOP_INVDESTALPHA       = 8,
-   SVGA3D_BLENDOP_DESTCOLOR          = 9,
-   SVGA3D_BLENDOP_INVDESTCOLOR       = 10,
-   SVGA3D_BLENDOP_SRCALPHASAT        = 11,
-   SVGA3D_BLENDOP_BLENDFACTOR        = 12,
-   SVGA3D_BLENDOP_INVBLENDFACTOR     = 13,
-   SVGA3D_BLENDOP_SRC1COLOR          = 14,
-   SVGA3D_BLENDOP_INVSRC1COLOR       = 15,
-   SVGA3D_BLENDOP_SRC1ALPHA          = 16,
-   SVGA3D_BLENDOP_INVSRC1ALPHA       = 17,
+   SVGA3D_BLENDOP_INVALID             = 0,
+   SVGA3D_BLENDOP_MIN                 = 1,
+   SVGA3D_BLENDOP_ZERO                = 1,
+   SVGA3D_BLENDOP_ONE                 = 2,
+   SVGA3D_BLENDOP_SRCCOLOR            = 3,
+   SVGA3D_BLENDOP_INVSRCCOLOR         = 4,
+   SVGA3D_BLENDOP_SRCALPHA            = 5,
+   SVGA3D_BLENDOP_INVSRCALPHA         = 6,
+   SVGA3D_BLENDOP_DESTALPHA           = 7,
+   SVGA3D_BLENDOP_INVDESTALPHA        = 8,
+   SVGA3D_BLENDOP_DESTCOLOR           = 9,
+   SVGA3D_BLENDOP_INVDESTCOLOR        = 10,
+   SVGA3D_BLENDOP_SRCALPHASAT         = 11,
+   SVGA3D_BLENDOP_BLENDFACTOR         = 12,
+   SVGA3D_BLENDOP_INVBLENDFACTOR      = 13,
+   SVGA3D_BLENDOP_SRC1COLOR           = 14,
+   SVGA3D_BLENDOP_INVSRC1COLOR        = 15,
+   SVGA3D_BLENDOP_SRC1ALPHA           = 16,
+   SVGA3D_BLENDOP_INVSRC1ALPHA        = 17,
+   SVGA3D_BLENDOP_BLENDFACTORALPHA    = 18,
+   SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19,
    SVGA3D_BLENDOP_MAX
 } SVGA3dBlendOp;
 
@@ -689,6 +825,27 @@ typedef enum {
    SVGA3D_BLENDEQ_MAX
 } SVGA3dBlendEquation;
 
+typedef enum {
+   SVGA3D_DX11_LOGICOP_MIN           = 0,
+   SVGA3D_DX11_LOGICOP_CLEAR         = 0,
+   SVGA3D_DX11_LOGICOP_SET           = 1,
+   SVGA3D_DX11_LOGICOP_COPY          = 2,
+   SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3,
+   SVGA3D_DX11_LOGICOP_NOOP          = 4,
+   SVGA3D_DX11_LOGICOP_INVERT        = 5,
+   SVGA3D_DX11_LOGICOP_AND           = 6,
+   SVGA3D_DX11_LOGICOP_NAND          = 7,
+   SVGA3D_DX11_LOGICOP_OR            = 8,
+   SVGA3D_DX11_LOGICOP_NOR           = 9,
+   SVGA3D_DX11_LOGICOP_XOR           = 10,
+   SVGA3D_DX11_LOGICOP_EQUIV         = 11,
+   SVGA3D_DX11_LOGICOP_AND_REVERSE   = 12,
+   SVGA3D_DX11_LOGICOP_AND_INVERTED  = 13,
+   SVGA3D_DX11_LOGICOP_OR_REVERSE    = 14,
+   SVGA3D_DX11_LOGICOP_OR_INVERTED   = 15,
+   SVGA3D_DX11_LOGICOP_MAX
+} SVGA3dDX11LogicOp;
+
 typedef enum {
    SVGA3D_FRONTWINDING_INVALID = 0,
    SVGA3D_FRONTWINDING_CW      = 1,
@@ -952,10 +1109,10 @@ typedef enum {
    SVGA3D_TEX_FILTER_NEAREST        = 1,
    SVGA3D_TEX_FILTER_LINEAR         = 2,
    SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
    SVGA3D_TEX_FILTER_MAX
 } SVGA3dTextureFilter;
 
@@ -1013,19 +1170,19 @@ typedef enum {
 
 typedef enum {
    SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,
+   SVGA3D_DECLUSAGE_BLENDINDICES,
+   SVGA3D_DECLUSAGE_NORMAL,
+   SVGA3D_DECLUSAGE_PSIZE,
+   SVGA3D_DECLUSAGE_TEXCOORD,
+   SVGA3D_DECLUSAGE_TANGENT,
+   SVGA3D_DECLUSAGE_BINORMAL,
+   SVGA3D_DECLUSAGE_TESSFACTOR,
+   SVGA3D_DECLUSAGE_POSITIONT,
+   SVGA3D_DECLUSAGE_COLOR,
+   SVGA3D_DECLUSAGE_FOG,
+   SVGA3D_DECLUSAGE_DEPTH,
+   SVGA3D_DECLUSAGE_SAMPLE,
    SVGA3D_DECLUSAGE_MAX
 } SVGA3dDeclUsage;
 
@@ -1033,10 +1190,11 @@ typedef enum {
    SVGA3D_DECLMETHOD_DEFAULT     = 0,
    SVGA3D_DECLMETHOD_PARTIALU,
    SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
    SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */
+                                       /* map */
 } SVGA3dDeclMethod;
 
 typedef enum {
@@ -1162,17 +1320,23 @@ typedef enum {
    SVGA3D_SHADERTYPE_MIN                        = 1,
    SVGA3D_SHADERTYPE_VS                         = 1,
    SVGA3D_SHADERTYPE_PS                         = 2,
-   SVGA3D_SHADERTYPE_MAX                        = 3,
    SVGA3D_SHADERTYPE_PREDX_MAX                  = 3,
    SVGA3D_SHADERTYPE_GS                         = 3,
-   SVGA3D_SHADERTYPE_DX_MAX                     = 4,
+   SVGA3D_SHADERTYPE_DX10_MAX                   = 4,
+   SVGA3D_SHADERTYPE_HS                         = 4,
+   SVGA3D_SHADERTYPE_DS                         = 5,
+   SVGA3D_SHADERTYPE_CS                         = 6,
+   SVGA3D_SHADERTYPE_MAX                        = 7
 } SVGA3dShaderType;
 
 #define SVGA3D_NUM_SHADERTYPE_PREDX \
    (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN)
 
-#define SVGA3D_NUM_SHADERTYPE_DX \
-   (SVGA3D_SHADERTYPE_DX_MAX - SVGA3D_SHADERTYPE_MIN)
+#define SVGA3D_NUM_SHADERTYPE_DX10 \
+   (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN)
+
+#define SVGA3D_NUM_SHADERTYPE \
+   (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN)
 
 typedef enum {
    SVGA3D_CONST_TYPE_MIN                        = 0,
@@ -1196,33 +1360,151 @@ typedef enum {
 } SVGA3dStretchBltMode;
 
 typedef enum {
-   SVGA3D_QUERYTYPE_INVALID                     = ((uint32)-1),
+   SVGA3D_QUERYTYPE_INVALID                     = ((uint8)-1),
    SVGA3D_QUERYTYPE_MIN                         = 0,
    SVGA3D_QUERYTYPE_OCCLUSION                   = 0,
-   SVGA3D_QUERYTYPE_EVENT                       = 1,
-   SVGA3D_QUERYTYPE_TIMESTAMP                   = 2,
-   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 3,
-   SVGA3D_QUERYTYPE_PIPELINESTATS               = 4,
-   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 5,
-   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 6,
-   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 7,
-   SVGA3D_QUERYTYPE_OCCLUSION64                 = 8,
+   SVGA3D_QUERYTYPE_TIMESTAMP                   = 1,
+   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 2,
+   SVGA3D_QUERYTYPE_PIPELINESTATS               = 3,
+   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 4,
+   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 5,
+   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 6,
+   SVGA3D_QUERYTYPE_OCCLUSION64                 = 7,
+   SVGA3D_QUERYTYPE_DX10_MAX                    = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM0             = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM1             = 9,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM2             = 10,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM3             = 11,
+   SVGA3D_QUERYTYPE_SOP_STREAM0                 = 12,
+   SVGA3D_QUERYTYPE_SOP_STREAM1                 = 13,
+   SVGA3D_QUERYTYPE_SOP_STREAM2                 = 14,
+   SVGA3D_QUERYTYPE_SOP_STREAM3                 = 15,
    SVGA3D_QUERYTYPE_MAX
 } SVGA3dQueryType;
 
+typedef uint8 SVGA3dQueryTypeUint8;
+
 #define SVGA3D_NUM_QUERYTYPE  (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN)
 
 /*
  * This is the maximum number of queries per context that can be active
  * simultaneously between a beginQuery and endQuery.
  */
-#define SVGA3D_MAX_QUERY_PER_CONTEXT 64
+#define SVGA3D_MAX_QUERY 64
+
+/*
+ * Query result buffer formats
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 passed;
+}
+#include "vmware_pack_end.h"
+SVGADXEventQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 timestamp;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 realFrequency;
+   uint32 disjoint;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampDisjointQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 inputAssemblyVertices;
+   uint64 inputAssemblyPrimitives;
+   uint64 vertexShaderInvocations;
+   uint64 geometryShaderInvocations;
+   uint64 geometryShaderPrimitives;
+   uint64 clipperInvocations;
+   uint64 clipperPrimitives;
+   uint64 pixelShaderInvocations;
+   uint64 hullShaderInvocations;
+   uint64 domainShaderInvocations;
+   uint64 computeShaderInvocations;
+}
+#include "vmware_pack_end.h"
+SVGADXPipelineStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 anySamplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 numPrimitivesWritten;
+   uint64 numPrimitivesRequired;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 overflowed;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusion64QueryResult;
+
+/*
+ * SVGADXQueryResultUnion is not intended for use in the protocol, but is
+ * very helpful when working with queries generically.
+ */
+typedef
+#include "vmware_pack_begin.h"
+union SVGADXQueryResultUnion {
+   SVGADXOcclusionQueryResult occ;
+   SVGADXEventQueryResult event;
+   SVGADXTimestampQueryResult ts;
+   SVGADXTimestampDisjointQueryResult tsDisjoint;
+   SVGADXPipelineStatisticsQueryResult pipelineStats;
+   SVGADXOcclusionPredicateQueryResult occPred;
+   SVGADXStreamOutStatisticsQueryResult soStats;
+   SVGADXStreamOutPredicateQueryResult soPred;
+   SVGADXOcclusion64QueryResult occ64;
+}
+#include "vmware_pack_end.h"
+SVGADXQueryResultUnion;
+
 
 typedef enum {
-   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Waiting on the host (set by guest) */
-   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully (set by host) */
-   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully (set by host) */
-   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (For guest use only) */
+   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Query is not finished yet */
+   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully */
+   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully */
+   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (guest only) */
 } SVGA3dQueryState;
 
 typedef enum {
@@ -1249,9 +1531,9 @@ typedef
 struct {
    union {
       struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+	 uint16  function;       /* SVGA3dFogFunction */
+	 uint8   type;           /* SVGA3dFogType */
+	 uint8   base;           /* SVGA3dFogBase */
       };
       uint32     uintValue;
    };
@@ -1287,8 +1569,47 @@ SVGA3dSize;
 /*
  * Guest-backed objects definitions.
  */
+typedef enum {
+   SVGA_OTABLE_MOB             = 0,
+   SVGA_OTABLE_MIN             = 0,
+   SVGA_OTABLE_SURFACE         = 1,
+   SVGA_OTABLE_CONTEXT         = 2,
+   SVGA_OTABLE_SHADER          = 3,
+   SVGA_OTABLE_SCREENTARGET    = 4,
+
+   SVGA_OTABLE_DX9_MAX         = 5,
 
-typedef uint32 SVGAMobId;
+   SVGA_OTABLE_DXCONTEXT       = 5,
+   SVGA_OTABLE_MAX             = 6
+} SVGAOTableType;
+
+/*
+ * Deprecated.
+ */
+#define SVGA_OTABLE_COUNT 4
+
+typedef enum {
+   SVGA_COTABLE_MIN             = 0,
+   SVGA_COTABLE_RTVIEW          = 0,
+   SVGA_COTABLE_DSVIEW          = 1,
+   SVGA_COTABLE_SRVIEW          = 2,
+   SVGA_COTABLE_ELEMENTLAYOUT   = 3,
+   SVGA_COTABLE_BLENDSTATE      = 4,
+   SVGA_COTABLE_DEPTHSTENCIL    = 5,
+   SVGA_COTABLE_RASTERIZERSTATE = 6,
+   SVGA_COTABLE_SAMPLER         = 7,
+   SVGA_COTABLE_STREAMOUTPUT    = 8,
+   SVGA_COTABLE_DXQUERY         = 9,
+   SVGA_COTABLE_DXSHADER        = 10,
+   SVGA_COTABLE_DX10_MAX        = 11,
+   SVGA_COTABLE_UAVIEW          = 11,
+   SVGA_COTABLE_MAX
+} SVGACOTableType;
+
+/*
+ * The largest size (number of entries) allowed in a COTable.
+ */
+#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2)
 
 typedef enum SVGAMobFormat {
    SVGA3D_MOBFMT_INVALID     = SVGA3D_INVALID_ID,
@@ -1300,7 +1621,11 @@ typedef enum SVGAMobFormat {
    SVGA3D_MOBFMT_PTDEPTH64_0 = 4,
    SVGA3D_MOBFMT_PTDEPTH64_1 = 5,
    SVGA3D_MOBFMT_PTDEPTH64_2 = 6,
+   SVGA3D_MOBFMT_PREDX_MAX   = 7,
+   SVGA3D_MOBFMT_EMPTY       = 7,
    SVGA3D_MOBFMT_MAX,
 } SVGAMobFormat;
 
-#endif // _SVGA3D_TYPES_H_
+#define SVGA3D_MOB_EMPTY_BASE 1
+
+#endif /* _SVGA3D_TYPES_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_escape.h b/src/gallium/drivers/svga/include/svga_escape.h
index 9d44c470430..884b1d1fb85 100644
--- a/src/gallium/drivers/svga/include/svga_escape.h
+++ b/src/gallium/drivers/svga/include/svga_escape.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -75,7 +75,7 @@
  */
 
 #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
 
 typedef
 struct {
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h
index ccbf7912e6d..161c3de7b80 100644
--- a/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -38,9 +38,9 @@
  * Video formats we support
  */
 
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
 
 typedef enum {
    SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
       uint32 streamId;
    } header;
 
-   // May include zero or more items.
+   /* May include zero or more items. */
    struct {
       uint32 registerId;
       uint32 value;
@@ -134,12 +134,12 @@ struct {
  */
 
 static inline Bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
 {
     int tmp;
 
@@ -196,4 +196,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
     return TRUE;
 }
 
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h
index e75b442f994..2661eef034f 100644
--- a/src/gallium/drivers/svga/include/svga_reg.h
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -43,6 +43,8 @@ typedef enum {
    SVGA_REG_ENABLE_HIDE = (1 << 1),
 } SvgaRegEnable;
 
+typedef uint32 SVGAMobId;
+
 /*
  * Arbitrary and meaningless limits. Please ignore these when writing
  * new drivers.
@@ -490,7 +492,7 @@ typedef struct SVGAGMRImageFormat {
       struct {
          uint32 bitsPerPixel : 8;
          uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+	 uint32 reserved     : 16;  /* Must be zero */
       };
 
       uint32 value;
@@ -533,7 +535,7 @@ typedef struct SVGAColorBGRX {
          uint32 b : 8;
          uint32 g : 8;
          uint32 r : 8;
-         uint32 x : 8;  // Unused
+	     uint32 x : 8;  /* Unused */
       };
 
       uint32 value;
@@ -605,24 +607,35 @@ struct {
  * SVGA_CAP_COMMAND_BUFFERS --
  *    Enable register based command buffer submission.
  *
+ * SVGA_CAP_DEAD1 --
+ *    This cap was incorrectly used by old drivers and should not be
+ *    reused.
+ *
+ * SVGA_CAP_CMD_BUFFERS_2 --
+ *    Enable support for the prepend command buffer submision
+ *    registers.  SVGA_REG_CMD_PREPEND_LOW and
+ *    SVGA_REG_CMD_PREPEND_HIGH.
+ *
  * SVGA_CAP_GBOBJECTS --
  *    Enable guest-backed objects and surfaces.
  *
+ * SVGA_CAP_CMD_BUFFERS_3 --
+ *    Enable support for command buffers in a mob.
  */
 
 #define SVGA_CAP_NONE               0x00000000
 #define SVGA_CAP_RECT_COPY          0x00000002
 #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080
 #define SVGA_CAP_8BIT_EMULATION     0x00000100
 #define SVGA_CAP_ALPHA_CURSOR       0x00000200
 #define SVGA_CAP_3D                 0x00004000
 #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000
 #define SVGA_CAP_PITCHLOCK          0x00020000
 #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
 #define SVGA_CAP_GMR2               0x00400000
@@ -631,6 +644,9 @@ struct {
 #define SVGA_CAP_DEAD1              0x02000000
 #define SVGA_CAP_CMD_BUFFERS_2      0x04000000
 #define SVGA_CAP_GBOBJECTS          0x08000000
+#define SVGA_CAP_CMD_BUFFERS_3      0x10000000
+
+#define SVGA_CAP_CMD_RESERVED       0x80000000
 
 
 /*
@@ -698,7 +714,7 @@ enum {
 
    SVGA_FIFO_CAPABILITIES = 4,
    SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
    SVGA_FIFO_FENCE,
 
    /*
@@ -710,20 +726,20 @@ enum {
     * These in block 3a, the VMX currently considers mandatory for the
     * extended FIFO.
     */
-   
-   // Valid if exists (i.e. if extended FIFO enabled):
+
+   /* Valid if exists (i.e. if extended FIFO enabled): */
    SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
    SVGA_FIFO_PITCHLOCK,
 
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
    SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
    SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
    SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
    SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
    SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
 
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
    SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
 
    /*
@@ -789,7 +805,7 @@ enum {
     * sets SVGA_FIFO_MIN high enough to leave room for them.
     */
 
-   // Valid if register exists:
+   /* Valid if register exists: */
    SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
    SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
    SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -1046,7 +1062,7 @@ enum {
 
 #define SVGA_FIFO_FLAG_NONE                 0
 #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
 
 /*
  * FIFO reservation sentinel value
@@ -1079,22 +1095,23 @@ enum {
    SVGA_VIDEO_DATA_OFFSET,
    SVGA_VIDEO_FORMAT,
    SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
    SVGA_VIDEO_WIDTH,
    SVGA_VIDEO_HEIGHT,
    SVGA_VIDEO_SRC_X,
    SVGA_VIDEO_SRC_Y,
    SVGA_VIDEO_SRC_WIDTH,
    SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
    SVGA_VIDEO_DST_WIDTH,
    SVGA_VIDEO_DST_HEIGHT,
    SVGA_VIDEO_PITCH_1,
    SVGA_VIDEO_PITCH_2,
    SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */
+                             /* (SVGA_ID_INVALID) */
    SVGA_VIDEO_NUM_REGS
 };
 
@@ -1180,10 +1197,10 @@ typedef struct SVGADisplayTopology {
  *    value of zero means no cloning should happen.
  */
 
-#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) // Must be set or results undefined
-#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated
-#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)  // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0)
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1)
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)
 
 /*
  * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
@@ -1207,7 +1224,7 @@ typedef struct SVGADisplayTopology {
 
 typedef
 struct {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
    uint32 id;
    uint32 flags;
    struct {
@@ -1224,6 +1241,13 @@ struct {
     * with SVGA_FIFO_CAP_SCREEN_OBJECT.
     */
    SVGAGuestImage backingStore;
+
+   /*
+    * The cloneCount field is treated as a hint from the guest that
+    * the user wants this display to be cloned, cloneCount times.
+    *
+    * A value of zero means no cloning should happen.
+    */
    uint32 cloneCount;
 } SVGAScreenObject;
 
@@ -1238,7 +1262,7 @@ struct {
  *  Note the holes in the command ID numbers: These commands have been
  *  deprecated, and the old IDs must not be reused.
  *
- *  Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ *  Command IDs from 1000 to 2999 are reserved for use by the SVGA3D
  *  protocol.
  *
  *  Each command's parameters are described by the comments and
@@ -1267,6 +1291,8 @@ typedef enum {
    SVGA_CMD_REMAP_GMR2            = 42,
    SVGA_CMD_DEAD                  = 43,
    SVGA_CMD_DEAD_2                = 44,
+   SVGA_CMD_NOP                   = 45,
+   SVGA_CMD_NOP_ERROR             = 46,
    SVGA_CMD_MAX
 } SVGAFifoCmdId;
 
@@ -1372,13 +1398,13 @@ struct {
 
 typedef
 struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
    uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
    /*
     * Followed by scanline data for AND mask, then XOR mask.
     * Each scanline is padded to a 32-bit boundary.
@@ -1401,7 +1427,7 @@ struct {
 
 typedef
 struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
@@ -1449,12 +1475,12 @@ struct {
 
 typedef
 struct {
-   uint32 color;     // In the same format as the GFB
+   uint32 color;     /* In the same format as the GFB */
    uint32 x;
    uint32 y;
    uint32 width;
    uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
 } SVGAFifoCmdFrontRopFill;
 
 
@@ -1526,7 +1552,7 @@ struct {
 
 typedef
 struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
 } SVGAFifoCmdDefineScreen;
 
 
@@ -1807,8 +1833,8 @@ typedef
 struct {
    uint32 gmrId;
    SVGARemapGMR2Flags flags;
-   uint32 offsetPages; // offset in pages to begin remap
-   uint32 numPages; // number of pages to remap
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
    /*
     * Followed by additional data depending on SVGARemapGMR2Flags.
     *
@@ -1823,7 +1849,7 @@ struct {
 /*
  * Size of SVGA device memory such as frame buffer and FIFO.
  */
-#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) // bytes
+#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) /* bytes */
 #define SVGA_VRAM_MIN_SIZE_3D       (16 * 1024 * 1024)
 #define SVGA_VRAM_MAX_SIZE         (128 * 1024 * 1024)
 #define SVGA_MEMORY_SIZE_MAX      (1024 * 1024 * 1024)
@@ -1832,7 +1858,7 @@ struct {
 #define SVGA_GRAPHICS_MEMORY_KB_MAX       (2 * 1024 * 1024)
 #define SVGA_GRAPHICS_MEMORY_KB_DEFAULT   (256 * 1024)
 
-#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) // 64 MB
+#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) /* 64 MB */
 
 /*
  * To simplify autoDetect display configuration, support a minimum of
@@ -1848,7 +1874,7 @@ struct {
 #define SVGA_VRAM_SIZE               (4 * 1024 * 1024)
 #define SVGA_VRAM_SIZE_3D           (64 * 1024 * 1024)
 #define SVGA_FIFO_SIZE                    (256 * 1024)
-#define SVGA_FIFO_SIZE_3D                 (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318)
+#define SVGA_FIFO_SIZE_3D                 (516 * 1024)
 #define SVGA_MEMORY_SIZE_DEFAULT   (160 * 1024 * 1024)
 #define SVGA_AUTODETECT_DEFAULT                  FALSE
 #else
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index 0874d2321fd..252e0d6c854 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1369,12 +1369,6 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
    case SVGA3D_BUMPL6V5U5:
       _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
       break;
-   case SVGA3D_BUMPX8L8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
-      break;
-   case SVGA3D_BUMPL8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
-      break;
    case SVGA3D_ARGB_S10E5:
       _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
       break;
@@ -1527,15 +1521,6 @@ dump_SVGA3dCmdDestroyGBShader(const SVGA3dCmdDestroyGBShader *cmd)
    _debug_printf("\t\t.shid = %u\n", cmd->shid);
 }
 
-static void
-dump_SVGA3dCmdBindGBShaderConsts(const SVGA3dCmdBindGBShaderConsts *cmd)
-{
-   _debug_printf("\t\t.cid = %u\n", cmd->cid);
-   _debug_printf("\t\t.shaderType = %u\n", cmd->shaderType);
-   _debug_printf("\t\t.shaderConstType = %u\n", cmd->shaderConstType);
-   _debug_printf("\t\t.sid = %u\n", cmd->sid);
-}
-
 static void
 dump_SVGA3dCmdBindGBSurface(const SVGA3dCmdBindGBSurface *cmd)
 {
@@ -1929,14 +1914,6 @@ svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
          body = (const uint8_t *)&cmd[1];
       }
       break;
-   case SVGA_3D_CMD_BIND_SHADERCONSTS:
-      _debug_printf("\tSVGA_3D_CMD_BIND_SHADERCONSTS\n");
-      {
-         const SVGA3dCmdBindGBShaderConsts *cmd = (const SVGA3dCmdBindGBShaderConsts *) body;
-         dump_SVGA3dCmdBindGBShaderConsts(cmd);
-         body = (const uint8_t *)&cmd[1];
-      }
-      break;
    case SVGA_3D_CMD_BIND_GB_SURFACE:
       _debug_printf("\tSVGA_3D_CMD_BIND_GB_SURFACE\n");
       {
-- 
cgit v1.2.3


From e2a1d21cb665b7e7bc2c9684cf533003cdc817b3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 6 Aug 2015 16:44:35 -0600
Subject: svga/winsys: Add support for VGPU10

This involves a few driver modifications to keep things building.
The driver may not actually run properly at this point.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_cmd.c                |   8 +-
 src/gallium/drivers/svga/svga_screen_cache.c       |   3 +-
 src/gallium/drivers/svga/svga_winsys.h             | 115 +++++++++++-
 .../winsys/svga/drm/pb_buffer_simple_fenced.c      |   4 +-
 src/gallium/winsys/svga/drm/svga_drm_public.h      |   2 +-
 src/gallium/winsys/svga/drm/vmw_buffer.c           |   2 +-
 src/gallium/winsys/svga/drm/vmw_buffer.h           |   2 +-
 src/gallium/winsys/svga/drm/vmw_context.c          | 206 +++++++++++++++++----
 src/gallium/winsys/svga/drm/vmw_context.h          |   2 +-
 src/gallium/winsys/svga/drm/vmw_fence.c            |   2 +-
 src/gallium/winsys/svga/drm/vmw_fence.h            |   2 +-
 src/gallium/winsys/svga/drm/vmw_screen.c           |   6 +-
 src/gallium/winsys/svga/drm/vmw_screen.h           |  25 ++-
 src/gallium/winsys/svga/drm/vmw_screen_dri.c       |   4 +-
 src/gallium/winsys/svga/drm/vmw_screen_ioctl.c     | 119 ++++++++----
 src/gallium/winsys/svga/drm/vmw_screen_pools.c     |   2 +-
 src/gallium/winsys/svga/drm/vmw_screen_svga.c      |  54 ++++--
 src/gallium/winsys/svga/drm/vmw_shader.c           |   5 +-
 src/gallium/winsys/svga/drm/vmw_shader.h           |   2 +-
 src/gallium/winsys/svga/drm/vmw_surface.c          |   2 +-
 src/gallium/winsys/svga/drm/vmw_surface.h          |   2 +-
 src/gallium/winsys/svga/drm/vmwgfx_drm.h           |  38 +++-
 22 files changed, 473 insertions(+), 134 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index b271832171d..b0cbc1354ca 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -1634,7 +1634,7 @@ SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
    if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
    cmd->type = type;
    cmd->sizeInBytes = sizeInBytes;
 
@@ -1658,7 +1658,7 @@ SVGA3D_BindGBShader(struct svga_winsys_context *swc,
       return PIPE_ERROR_OUT_OF_MEMORY;
 
    swc->shader_relocation(swc, &cmd->shid, &cmd->mobid,
-			  &cmd->offsetInBytes, gbshader);
+			  &cmd->offsetInBytes, gbshader, 0);
 
    swc->commit(swc);
 
@@ -1683,7 +1683,7 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
    swc->context_relocation(swc, &cmd->cid);
    cmd->type = type;
    if (gbshader)
-      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
    else
       cmd->shid = SVGA_ID_INVALID;
    swc->commit(swc);
@@ -1705,7 +1705,7 @@ SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
    if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
 
    swc->commit(swc);
 
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index 3c765394a88..f99a0b305a7 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -453,7 +453,8 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                                    0 : SVGA_SURFACE_USAGE_SHARED,
                                    key->size,
                                    key->numFaces,
-                                   key->numMipLevels);
+                                   key->numMipLevels,
+                                   0);
       if (handle)
          SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
                   "  CREATE sid %p sz %dx%dx%d\n",
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 19d074fd66d..c750603989f 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -79,15 +79,18 @@ struct winsys_handle;
 #define SVGA_FENCE_FLAG_EXEC      (1 << 0)
 #define SVGA_FENCE_FLAG_QUERY     (1 << 1)
 
-#define SVGA_SURFACE_USAGE_SHARED (1 << 0)
+#define SVGA_SURFACE_USAGE_SHARED  (1 << 0)
+#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1)
+
+#define SVGA_QUERY_FLAG_SET        (1 << 0)
+#define SVGA_QUERY_FLAG_REF        (1 << 1)
 
 /** Opaque surface handle */
 struct svga_winsys_surface;
 
-
 /** Opaque guest-backed objects */
 struct svga_winsys_gb_shader;
-
+struct svga_winsys_gb_query;
 
 
 /**
@@ -143,7 +146,8 @@ struct svga_winsys_context
 	                uint32 *shid,
 			uint32 *mobid,
 			uint32 *offset,
-	                struct svga_winsys_gb_shader *shader);
+	                struct svga_winsys_gb_shader *shader,
+                        unsigned flags);
 
    /**
     * Emit a relocation for a guest-backed context.
@@ -173,6 +177,26 @@ struct svga_winsys_context
 		     uint32 offset,
 		     unsigned flags);
 
+   /**
+    * Emit a relocation for a guest-backed query object.
+    *
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*query_relocation)(struct svga_winsys_context *swc,
+	               SVGAMobId *id,
+	               struct svga_winsys_gb_query *query);
+
+   /**
+    * Bind queries to context.
+    * \param flags  exactly one of SVGA_QUERY_FLAG_SET/REF
+    */
+   enum pipe_error
+   (*query_bind)(struct svga_winsys_context *sws,
+                 struct svga_winsys_gb_query *query,
+                 unsigned flags);
+
    void
    (*commit)(struct svga_winsys_context *swc);
    
@@ -219,6 +243,36 @@ struct svga_winsys_context
                     struct svga_winsys_surface *surface,
                     boolean *rebind);
 
+   /**
+    * Create and define a DX GB shader that resides in the device COTable.
+    * Caller of this function will issue the DXDefineShader command.
+    */
+   struct svga_winsys_gb_shader *
+   (*shader_create)(struct svga_winsys_context *swc,
+                    uint32 shaderId,
+                    SVGA3dShaderType shaderType,
+                    const uint32 *bytecode,
+                    uint32 bytecodeLen);
+
+   /**
+    * Destroy a DX GB shader.
+    * This function will issue the DXDestroyShader command.
+    */
+   void
+   (*shader_destroy)(struct svga_winsys_context *swc,
+                     struct svga_winsys_gb_shader *shader);
+
+   /**
+    * Rebind a DX GB resource to a context.
+    * This is called to reference a DX GB resource in the command stream in
+    * order to page in the associated resource in case the memory has been
+    * paged out, and to fence it if necessary after command submission.
+    */
+   enum pipe_error
+   (*resource_rebind)(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *surface,
+                      struct svga_winsys_gb_shader *shader,
+                      unsigned flags);
 };
 
 
@@ -260,7 +314,7 @@ struct svga_winsys_screen
     * \param format Format Device surface format
     * \param usage Winsys usage: bitmask of SVGA_SURFACE_USAGE_x flags
     * \param size Surface size given in device format
-    * \param numFaces Number of faces of the surface (1 or 6)
+    * \param numLayers Number of layers of the surface (or cube faces)
     * \param numMipLevels Number of mipmap levels for each face
     *
     * Returns the surface ID (sid). Surfaces are generic
@@ -274,7 +328,7 @@ struct svga_winsys_screen
     * - Each face has a list of mipmap levels
     *
     * - Each mipmap image may have multiple volume
-    *   slices, if the image is three dimensional.
+    *   slices for 3D image, or multiple 2D slices for texture array.
     *
     * - Each slice is a 2D array of 'blocks'
     *
@@ -296,8 +350,9 @@ struct svga_winsys_screen
                      SVGA3dSurfaceFormat format,
                      unsigned usage,
                      SVGA3dSize size,
-                     uint32 numFaces,
-                     uint32 numMipLevels);
+                     uint32 numLayers,
+                     uint32 numMipLevels,
+                     unsigned sampleCount);
 
    /**
     * Creates a surface from a winsys handle.
@@ -343,7 +398,7 @@ struct svga_winsys_screen
    (*surface_can_create)(struct svga_winsys_screen *sws,
                          SVGA3dSurfaceFormat format,
                          SVGA3dSize size,
-                         uint32 numFaces,
+                         uint32 numLayers,
                          uint32 numMipLevels);
 
    /**
@@ -420,7 +475,7 @@ struct svga_winsys_screen
     */
    struct svga_winsys_gb_shader *
    (*shader_create)(struct svga_winsys_screen *sws,
-		    SVGA3dShaderType type,
+		    SVGA3dShaderType shaderType,
 		    const uint32 *bytecode,
 		    uint32 bytecodeLen);
 
@@ -432,6 +487,46 @@ struct svga_winsys_screen
    (*shader_destroy)(struct svga_winsys_screen *sws,
 		     struct svga_winsys_gb_shader *shader);
 
+   /**
+    * Create and define a GB query.
+    */
+   struct svga_winsys_gb_query *
+   (*query_create)(struct svga_winsys_screen *sws, uint32 len);
+
+   /**
+    * Destroy a GB query.
+    */
+   void
+   (*query_destroy)(struct svga_winsys_screen *sws,
+		    struct svga_winsys_gb_query *query);
+
+   /**
+    * Initialize the query state of the query that resides in the slot
+    * specified in offset
+    * \return zero on success.
+    */
+   int
+   (*query_init)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState queryState);
+
+   /**
+    * Inquire for the query state and result of the query that resides
+    * in the slot specified in offset
+    */
+   void
+   (*query_get_result)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState *queryState,
+                       void *result, uint32 resultLen);
+
+   /** Have VGPU v10 hardware? */
+   boolean have_vgpu10;
+
+   /** To rebind resources at the beginnning of a new command buffer */
+   boolean need_to_rebind_resources;
 };
 
 
diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
index 5ef95f3d6a9..c1b9eb95c52 100644
--- a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
+++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2007-2010 VMware, Inc.
+ * Copyright 2007-2015 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -339,6 +339,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
          /* TODO: remove consequents buffers with the same fence? */
 
          assert(!destroyed);
+         (void) destroyed;
 
          fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
 
@@ -660,6 +661,7 @@ fenced_buffer_fence(struct pb_buffer *buf,
          boolean destroyed;
          destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
          assert(!destroyed);
+         (void) destroyed;
       }
       if (fence) {
          ops->fence_reference(ops, &fenced_buf->fence, fence);
diff --git a/src/gallium/winsys/svga/drm/svga_drm_public.h b/src/gallium/winsys/svga/drm/svga_drm_public.h
index e98c89da1e1..fa2dcefb6f2 100644
--- a/src/gallium/winsys/svga/drm/svga_drm_public.h
+++ b/src/gallium/winsys/svga/drm/svga_drm_public.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2010 VMware, Inc.  All rights reserved.
+ * Copyright 2010-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c
index 7eab3d050e4..c082dcc34e9 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.h b/src/gallium/winsys/svga/drm/vmw_buffer.h
index b9cbb25541f..6e1151e5f36 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.h
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 31bedde7c41..394795eae14 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
 #include "vmw_surface.h"
 #include "vmw_fence.h"
 #include "vmw_shader.h"
+#include "vmw_query.h"
 
 #define VMW_COMMAND_SIZE (64*1024)
 #define VMW_SURFACE_RELOCS (1024)
@@ -391,24 +392,27 @@ vmw_swc_mob_relocation(struct svga_winsys_context *swc,
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
    struct vmw_buffer_relocation *reloc;
+   struct pb_buffer *pb_buffer = vmw_pb_buffer(buffer);
 
-   assert(vswc->region.staged < vswc->region.reserved);
+   if (id) {
+      assert(vswc->region.staged < vswc->region.reserved);
 
-   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
-   reloc->mob.id = id;
-   reloc->mob.offset_into_mob = offset_into_mob;
+      reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+      reloc->mob.id = id;
+      reloc->mob.offset_into_mob = offset_into_mob;
 
-   /*
-    * pb_validate holds a refcount to the buffer, so no need to
-    * refcount it again in the relocation.
-    */
-   reloc->buffer = vmw_pb_buffer(buffer);
-   reloc->offset = offset;
-   reloc->is_mob = TRUE;
-   ++vswc->region.staged;
+      /*
+       * pb_validate holds a refcount to the buffer, so no need to
+       * refcount it again in the relocation.
+       */
+      reloc->buffer = pb_buffer;
+      reloc->offset = offset;
+      reloc->is_mob = TRUE;
+      ++vswc->region.staged;
+   }
 
-   if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
-      vswc->seen_mobs += reloc->buffer->size;
+   if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) {
+      vswc->seen_mobs += pb_buffer->size;
       /* divide by 5, tested for best performance */
       if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
          vswc->preemptive_flush = TRUE;
@@ -481,7 +485,8 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
       p_atomic_inc(&vsurf->validated);
    }
 
-   *where = vsurf->sid;
+   if (where)
+      *where = vsurf->sid;
 }
 
 static void
@@ -495,7 +500,7 @@ vmw_swc_surface_relocation(struct svga_winsys_context *swc,
 
    assert(swc->have_gb_objects || mobid == NULL);
 
-   if(!surface) {
+   if (!surface) {
       *where = SVGA3D_INVALID_ID;
       if (mobid)
          *mobid = SVGA3D_INVALID_ID;
@@ -525,51 +530,68 @@ vmw_swc_shader_relocation(struct svga_winsys_context *swc,
 			  uint32 *shid,
 			  uint32 *mobid,
 			  uint32 *offset,
-			  struct svga_winsys_gb_shader *shader)
+			  struct svga_winsys_gb_shader *shader,
+                          unsigned flags)
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_winsys_screen *vws = vswc->vws;
    struct vmw_svga_winsys_shader *vshader;
    struct vmw_ctx_validate_item *ishader;
+
    if(!shader) {
       *shid = SVGA3D_INVALID_ID;
       return;
    }
 
-   assert(vswc->shader.staged < vswc->shader.reserved);
    vshader = vmw_svga_winsys_shader(shader);
-   ishader = util_hash_table_get(vswc->hash, vshader);
 
-   if (ishader == NULL) {
-      ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
-      vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
-      ishader->referenced = FALSE;
-      /*
-       * Note that a failure here may just fall back to unhashed behavior
-       * and potentially cause unnecessary flushing, so ignore the
-       * return code.
-       */
-      (void) util_hash_table_set(vswc->hash, vshader, ishader);
-      ++vswc->shader.staged;
-   }
+   if (!vws->base.have_vgpu10) {
+      assert(vswc->shader.staged < vswc->shader.reserved);
+      ishader = util_hash_table_get(vswc->hash, vshader);
+
+      if (ishader == NULL) {
+         ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
+         vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
+         ishader->referenced = FALSE;
+         /*
+          * Note that a failure here may just fall back to unhashed behavior
+          * and potentially cause unnecessary flushing, so ignore the
+          * return code.
+          */
+         (void) util_hash_table_set(vswc->hash, vshader, ishader);
+         ++vswc->shader.staged;
+      }
 
-   if (!ishader->referenced) {
-      ishader->referenced = TRUE;
-      p_atomic_inc(&vshader->validated);
+      if (!ishader->referenced) {
+         ishader->referenced = TRUE;
+         p_atomic_inc(&vshader->validated);
+      }
    }
 
-   *shid = vshader->shid;
+   if (shid)
+      *shid = vshader->shid;
 
-   if (mobid != NULL && vshader->buf)
+   if (vshader->buf)
       vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
 			     0, SVGA_RELOC_READ);
 }
 
+static void
+vmw_swc_query_relocation(struct svga_winsys_context *swc,
+                         SVGAMobId *id,
+                         struct svga_winsys_gb_query *query)
+{
+   /* Queries are backed by one big MOB */
+   vmw_swc_mob_relocation(swc, id, NULL, query->buf, 0,
+                          SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+}
+
 static void
 vmw_swc_commit(struct svga_winsys_context *swc)
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
 
-   assert(vswc->command.reserved);
+   assert(vswc->command.reserved >= 0);
    assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
    vswc->command.used += vswc->command.reserved;
    vswc->command.reserved = 0;
@@ -633,6 +655,96 @@ static int vmw_ptr_compare(void *key1, void *key2)
    return (key1 == key2) ? 0 : 1;
 }
 
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_screate - The winsys shader_crate callback
+ *
+ * @swc: The winsys context.
+ * @shaderId: Previously allocated shader id.
+ * @shaderType: The shader type.
+ * @bytecode: The shader bytecode
+ * @bytecodelen: The length of the bytecode.
+ *
+ * Creates an svga_winsys_gb_shader structure and allocates a buffer for the
+ * shader code and copies the shader code into the buffer. Shader
+ * resource creation is not done.
+ */
+static struct svga_winsys_gb_shader *
+vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc,
+                                     uint32 shaderId,
+                                     SVGA3dShaderType shaderType,
+                                     const uint32 *bytecode,
+                                     uint32 bytecodeLen)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_svga_winsys_shader *shader;
+   struct svga_winsys_gb_shader *gb_shader =
+      vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode,
+                                    bytecodeLen);
+   if (!gb_shader)
+      return NULL;
+
+   shader = vmw_svga_winsys_shader(gb_shader);
+   shader->shid = shaderId;
+
+   return gb_shader;
+}
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_destroy - The winsys shader_destroy callback.
+ *
+ * @swc: The winsys context.
+ * @shader: A shader structure previously allocated by shader_create.
+ *
+ * Frees the shader structure and the buffer holding the shader code.
+ */
+static void
+vmw_svga_winsys_vgpu10_shader_destroy(struct svga_winsys_context *swc,
+                                      struct svga_winsys_gb_shader *shader)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+   vmw_svga_winsys_shader_destroy(&vswc->vws->base, shader);
+}
+
+/**
+ * vmw_svga_winsys_resource_rebind - The winsys resource_rebind callback
+ *
+ * @swc: The winsys context.
+ * @surface: The surface to be referenced.
+ * @shader: The shader to be referenced.
+ * @flags: Relocation flags.
+ *
+ * This callback is needed because shader backing buffers are sub-allocated, and
+ * hence the kernel fencing is not sufficient. The buffers need to be put on
+ * the context's validation list and fenced after command submission to avoid
+ * reuse of busy shader buffers. In addition, surfaces need to be put on the
+ * validation list in order for the driver to regard them as referenced
+ * by the command stream.
+ */
+static enum pipe_error
+vmw_svga_winsys_resource_rebind(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                struct svga_winsys_gb_shader *shader,
+                                unsigned flags)
+{
+   /**
+    * Need to reserve one validation item for either the surface or
+    * the shader.
+    */
+   if (!vmw_swc_reserve(swc, 0, 1))
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   if (surface)
+      vmw_swc_surface_relocation(swc, NULL, NULL, surface, flags);
+   else if (shader)
+      vmw_swc_shader_relocation(swc, NULL, NULL, NULL, shader, flags);
+
+   vmw_swc_commit(swc);
+
+   return PIPE_OK;
+}
+
 struct svga_winsys_context *
 vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
 {
@@ -648,6 +760,8 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
    vswc->base.surface_relocation = vmw_swc_surface_relocation;
    vswc->base.region_relocation = vmw_swc_region_relocation;
    vswc->base.mob_relocation = vmw_swc_mob_relocation;
+   vswc->base.query_relocation = vmw_swc_query_relocation;
+   vswc->base.query_bind = vmw_swc_query_bind;
    vswc->base.context_relocation = vmw_swc_context_relocation;
    vswc->base.shader_relocation = vmw_swc_shader_relocation;
    vswc->base.commit = vmw_swc_commit;
@@ -655,7 +769,19 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
    vswc->base.surface_map = vmw_svga_winsys_surface_map;
    vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap;
 
-   vswc->base.cid = vmw_ioctl_context_create(vws);
+  vswc->base.shader_create = vmw_svga_winsys_vgpu10_shader_create;
+  vswc->base.shader_destroy = vmw_svga_winsys_vgpu10_shader_destroy;
+
+  vswc->base.resource_rebind = vmw_svga_winsys_resource_rebind;
+
+   if (sws->have_vgpu10)
+      vswc->base.cid = vmw_ioctl_extended_context_create(vws, sws->have_vgpu10);
+   else
+      vswc->base.cid = vmw_ioctl_context_create(vws);
+
+   if (vswc->base.cid == -1)
+      goto out_no_context;
+
    vswc->base.have_gb_objects = sws->have_gb_objects;
 
    vswc->vws = vws;
@@ -682,6 +808,8 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
 out_no_hash:
    pb_validate_destroy(vswc->validate);
 out_no_validate:
+   vmw_ioctl_context_destroy(vws, vswc->base.cid);
+out_no_context:
    FREE(vswc);
    return NULL;
 }
diff --git a/src/gallium/winsys/svga/drm/vmw_context.h b/src/gallium/winsys/svga/drm/vmw_context.h
index 2c2fb415782..b71af6f04a0 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.h
+++ b/src/gallium/winsys/svga/drm/vmw_context.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_fence.c b/src/gallium/winsys/svga/drm/vmw_fence.c
index 17822ce27fd..bcf473a932c 100644
--- a/src/gallium/winsys/svga/drm/vmw_fence.c
+++ b/src/gallium/winsys/svga/drm/vmw_fence.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009-2011 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_fence.h b/src/gallium/winsys/svga/drm/vmw_fence.h
index 56f1a0ab05d..f6381feaa2b 100644
--- a/src/gallium/winsys/svga/drm/vmw_fence.h
+++ b/src/gallium/winsys/svga/drm/vmw_fence.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c
index 0c343cc7bdd..7fcb6d27714 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -57,7 +57,7 @@ static unsigned vmw_dev_hash(void *key)
  */
 
 struct vmw_winsys_screen *
-vmw_winsys_create( int fd, boolean use_old_scanout_flag )
+vmw_winsys_create( int fd )
 {
    struct vmw_winsys_screen *vws;
    struct stat stat_buf;
@@ -84,8 +84,8 @@ vmw_winsys_create( int fd, boolean use_old_scanout_flag )
    vws->device = stat_buf.st_rdev;
    vws->open_count = 1;
    vws->ioctl.drm_fd = dup(fd);
-   vws->use_old_scanout_flag = use_old_scanout_flag;
    vws->base.have_gb_dma = TRUE;
+   vws->base.need_to_rebind_resources = FALSE;
 
    if (!vmw_ioctl_init(vws))
       goto out_no_ioctl;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.h b/src/gallium/winsys/svga/drm/vmw_screen.h
index ce98db9b397..79d0949e96a 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.h
+++ b/src/gallium/winsys/svga/drm/vmw_screen.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -65,8 +65,6 @@ struct vmw_winsys_screen
 {
    struct svga_winsys_screen base;
 
-   boolean use_old_scanout_flag;
-
    struct {
       int drm_fd;
       uint32_t hwversion;
@@ -76,6 +74,8 @@ struct vmw_winsys_screen
       uint64_t max_surface_memory;
       uint64_t max_texture_size;
       boolean have_drm_2_6;
+      boolean have_drm_2_9;
+      uint32_t drm_execbuf_version;
    } ioctl;
 
    struct {
@@ -115,6 +115,10 @@ vmw_region_size(struct vmw_region *region);
 uint32
 vmw_ioctl_context_create(struct vmw_winsys_screen *vws);
 
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+                                  boolean vgpu10);
+
 void
 vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws,
                           uint32 cid);
@@ -126,7 +130,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
                          unsigned usage,
                          SVGA3dSize size,
                          uint32 numFaces,
-                         uint32 numMipLevels);
+                         uint32 numMipLevels,
+                         unsigned sampleCount);
 uint32
 vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
 			    SVGA3dSurfaceFlags flags,
@@ -135,6 +140,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
 			    SVGA3dSize size,
 			    uint32 numFaces,
 			    uint32 numMipLevels,
+                            unsigned sampleCount,
                             uint32 buffer_handle,
 			    struct vmw_region **p_region);
 
@@ -213,7 +219,7 @@ boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws);
 void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws);
 void vmw_pools_cleanup(struct vmw_winsys_screen *vws);
 
-struct vmw_winsys_screen *vmw_winsys_create(int fd, boolean use_old_scanout_flag);
+struct vmw_winsys_screen *vmw_winsys_create(int fd);
 void vmw_winsys_destroy(struct vmw_winsys_screen *sws);
 void vmw_winsys_screen_set_throttling(struct pipe_screen *screen,
 				      uint32_t throttle_us);
@@ -227,4 +233,13 @@ vmw_fences_signal(struct pb_fence_ops *fence_ops,
                   uint32_t emitted,
                   boolean has_emitted);
 
+struct svga_winsys_gb_shader *
+vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
+			      SVGA3dShaderType type,
+			      const uint32 *bytecode,
+			      uint32 bytecodeLen);
+void
+vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
+			       struct svga_winsys_gb_shader *shader);
+
 #endif /* VMW_SCREEN_H_ */
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
index e70e0fec4a3..01bb0e2d753 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -111,7 +111,7 @@ svga_drm_winsys_screen_create(int fd)
 			       &drm_compat, "vmwgfx drm driver"))
       return NULL;
 
-   vws = vmw_winsys_create( fd, FALSE );
+   vws = vmw_winsys_create(fd);
    if (!vws)
       goto out_no_vws;
 
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index e2f0da58bf9..c86d95a14fe 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -63,13 +63,6 @@ struct vmw_region
    uint32_t size;
 };
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. In newer versions of the kernel
- * interface the driver uses a special field.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
 uint32_t
 vmw_region_size(struct vmw_region *region)
 {
@@ -91,10 +84,30 @@ vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
       return -1;
 
    vmw_printf("Context id is %d\n", c_arg.cid);
-
    return c_arg.cid;
 }
 
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+                                  boolean vgpu10)
+{
+   union drm_vmw_extended_context_arg c_arg;
+   int ret;
+
+   VMW_FUNC;
+   memset(&c_arg, 0, sizeof(c_arg));
+   c_arg.req = (vgpu10 ? drm_vmw_context_vgpu10 : drm_vmw_context_legacy);
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd,
+                             DRM_VMW_CREATE_EXTENDED_CONTEXT,
+                             &c_arg, sizeof(c_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_printf("Context id is %d\n", c_arg.cid);
+   return c_arg.rep.cid;
+}
+
 void
 vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
 {
@@ -116,7 +129,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
                          SVGA3dSurfaceFormat format,
                          unsigned usage,
                          SVGA3dSize size,
-                         uint32_t numFaces, uint32_t numMipLevels)
+                         uint32_t numFaces, uint32_t numMipLevels,
+                         unsigned sampleCount)
 {
    union drm_vmw_surface_create_arg s_arg;
    struct drm_vmw_surface_create_req *req = &s_arg.req;
@@ -131,17 +145,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
    vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
 
    memset(&s_arg, 0, sizeof(s_arg));
-   if (vws->use_old_scanout_flag &&
-       (flags & SVGA3D_SURFACE_HINT_SCANOUT)) {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   } else if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-      req->flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
-      req->scanout = true;
-   } else {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   }
+   req->flags = (uint32_t) flags;
+   req->scanout = !!(usage & SVGA_SURFACE_USAGE_SCANOUT);
    req->format = (uint32_t) format;
    req->shareable = !!(usage & SVGA_SURFACE_USAGE_SHARED);
 
@@ -188,6 +193,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
 			    SVGA3dSize size,
 			    uint32_t numFaces,
 			    uint32_t numMipLevels,
+                            unsigned sampleCount,
                             uint32_t buffer_handle,
 			    struct vmw_region **p_region)
 {
@@ -206,25 +212,29 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
    }
 
    memset(&s_arg, 0, sizeof(s_arg));
-   if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-      req->svga3d_flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
-      req->drm_surface_flags = drm_vmw_surface_flag_scanout;
-   } else {
-      req->svga3d_flags = (uint32_t) flags;
-   }
+   req->svga3d_flags = (uint32_t) flags;
+   if (usage & SVGA_SURFACE_USAGE_SCANOUT)
+      req->drm_surface_flags |= drm_vmw_surface_flag_scanout;
    req->format = (uint32_t) format;
    if (usage & SVGA_SURFACE_USAGE_SHARED)
       req->drm_surface_flags |= drm_vmw_surface_flag_shareable;
    req->drm_surface_flags |= drm_vmw_surface_flag_create_buffer; 
-
-   assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
-	  DRM_VMW_MAX_MIP_LEVELS);
    req->base_size.width = size.width;
    req->base_size.height = size.height;
    req->base_size.depth = size.depth;
    req->mip_levels = numMipLevels;
    req->multisample_count = 0;
    req->autogen_filter = SVGA3D_TEX_FILTER_NONE;
+
+   if (vws->base.have_vgpu10) {
+      req->array_size = numFaces;
+      req->multisample_count = sampleCount;
+   } else {
+      assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+	     DRM_VMW_MAX_MIP_LEVELS);
+      req->array_size = 0;
+   }
+
    if (buffer_handle)
       req->buffer_handle = buffer_handle;
    else
@@ -403,6 +413,7 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid,
    struct drm_vmw_execbuf_arg arg;
    struct drm_vmw_fence_rep rep;
    int ret;
+   int argsize;
 
 #ifdef DEBUG
    {
@@ -433,13 +444,21 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid,
    arg.commands = (unsigned long)commands;
    arg.command_size = size;
    arg.throttle_us = throttle_us;
-   arg.version = DRM_VMW_EXECBUF_VERSION;
-
+   arg.version = vws->ioctl.drm_execbuf_version;
+   arg.context_handle = (vws->base.have_vgpu10 ? cid : SVGA3D_INVALID_ID);
+
+   /* In DRM_VMW_EXECBUF_VERSION 1, the drm_vmw_execbuf_arg structure ends with
+    * the flags field. The structure size sent to drmCommandWrite must match
+    * the drm_execbuf_version. Otherwise, an invalid value will be returned.
+    */
+   argsize = vws->ioctl.drm_execbuf_version > 1 ? sizeof(arg) :
+                offsetof(struct drm_vmw_execbuf_arg, context_handle);
    do {
-       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, argsize);
    } while(ret == -ERESTART);
    if (ret) {
       vmw_error("%s error %s.\n", __FUNCTION__, strerror(-ret));
+      abort();
    }
 
    if (rep.error) {
@@ -832,6 +851,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
    int ret;
    uint32_t *cap_buffer;
    drmVersionPtr version;
+   boolean drm_gb_capable;
    boolean have_drm_2_5;
 
    VMW_FUNC;
@@ -844,6 +864,12 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
       (version->version_major == 2 && version->version_minor > 4);
    vws->ioctl.have_drm_2_6 = version->version_major > 2 ||
       (version->version_major == 2 && version->version_minor > 5);
+   vws->ioctl.have_drm_2_9 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 8);
+
+   vws->ioctl.drm_execbuf_version = vws->ioctl.have_drm_2_9 ? 2 : 1;
+
+   drm_gb_capable = have_drm_2_5;
 
    memset(&gp_arg, 0, sizeof(gp_arg));
    gp_arg.param = DRM_VMW_PARAM_3D;
@@ -875,9 +901,10 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
       vws->base.have_gb_objects =
          !!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS);
    
-   if (vws->base.have_gb_objects && !have_drm_2_5)
+   if (vws->base.have_gb_objects && !drm_gb_capable)
       goto out_no_3d;
 
+   vws->base.have_vgpu10 = FALSE;
    if (vws->base.have_gb_objects) {
       memset(&gp_arg, 0, sizeof(gp_arg));
       gp_arg.param = DRM_VMW_PARAM_3D_CAPS_SIZE;
@@ -918,6 +945,27 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
 
       /* Never early flush surfaces, mobs do accounting. */
       vws->ioctl.max_surface_memory = -1;
+
+      if (vws->ioctl.have_drm_2_9) {
+
+         memset(&gp_arg, 0, sizeof(gp_arg));
+         gp_arg.param = DRM_VMW_PARAM_VGPU10;
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+         if (ret == 0 && gp_arg.value != 0) {
+            const char *vgpu10_val;
+
+            debug_printf("Have VGPU10 interface and hardware.\n");
+            vws->base.have_vgpu10 = TRUE;
+            vgpu10_val = getenv("SVGA_VGPU10");
+            if (vgpu10_val && strcmp(vgpu10_val, "0") == 0) {
+               debug_printf("Disabling VGPU10 interface.\n");
+               vws->base.have_vgpu10 = FALSE;
+            } else {
+               debug_printf("Enabling VGPU10 interface.\n");
+            }
+         }
+      }
    } else {
       vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
 
@@ -938,6 +986,9 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
       size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t);
    }
 
+   debug_printf("VGPU10 interface is %s.\n",
+                vws->base.have_vgpu10 ? "on" : "off");
+
    cap_buffer = calloc(1, size);
    if (!cap_buffer) {
       debug_printf("Failed alloc fifo 3D caps buffer.\n");
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_pools.c b/src/gallium/winsys/svga/drm/vmw_screen_pools.c
index 1815bfa6707..48c95e5069e 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_pools.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_pools.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_svga.c b/src/gallium/winsys/svga/drm/vmw_screen_svga.c
index 32f16cd447a..a18dd827c64 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_svga.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_svga.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -48,6 +48,7 @@
 #include "vmw_buffer.h"
 #include "vmw_fence.h"
 #include "vmw_shader.h"
+#include "vmw_query.h"
 #include "svga3d_surfacedefs.h"
 
 /**
@@ -137,8 +138,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
                                SVGA3dSurfaceFormat format,
                                unsigned usage,
                                SVGA3dSize size,
-                               uint32 numFaces,
-                               uint32 numMipLevels)
+                               uint32 numLayers,
+                               uint32 numMipLevels,
+                               unsigned sampleCount)
 {
    struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
    struct vmw_svga_winsys_surface *surface;
@@ -146,7 +148,6 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
    struct pb_manager *provider;
    uint32_t buffer_size;
 
-
    memset(&desc, 0, sizeof(desc));
    surface = CALLOC_STRUCT(vmw_svga_winsys_surface);
    if(!surface)
@@ -163,7 +164,11 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
     * Used for the backing buffer GB surfaces, and to approximate
     * when to flush on non-GB hosts.
     */
-   buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, (numFaces == 6));
+   buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, 
+                                                   numLayers);
+   if (flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
+      buffer_size += sizeof(SVGA3dDXSOState);
+
    if (buffer_size > vws->ioctl.max_texture_size) {
       goto no_sid;
    }
@@ -189,8 +194,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
       }
 
       surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
-                                                 size, numFaces,
-                                                 numMipLevels, ptr.gmrId,
+                                                 size, numLayers,
+                                                 numMipLevels, sampleCount,
+                                                 ptr.gmrId,
                                                  surface->buf ? NULL :
 						 &desc.region);
 
@@ -205,9 +211,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
          vmw_svga_winsys_buffer_destroy(sws, surface->buf);
          surface->buf = NULL;
          surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
-                                                    size, numFaces,
-                                                    numMipLevels, 0,
-                                                    &desc.region);
+                                                    size, numLayers,
+                                                    numMipLevels, sampleCount,
+                                                    0, &desc.region);
          if (surface->sid == SVGA3D_INVALID_ID)
             goto no_sid;
       }
@@ -233,7 +239,8 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
       }
    } else {
       surface->sid = vmw_ioctl_surface_create(vws, flags, format, usage,
-                                              size, numFaces, numMipLevels);
+                                              size, numLayers, numMipLevels,
+                                              sampleCount);
       if(surface->sid == SVGA3D_INVALID_ID)
          goto no_sid;
 
@@ -257,7 +264,7 @@ static boolean
 vmw_svga_winsys_surface_can_create(struct svga_winsys_screen *sws,
                                SVGA3dSurfaceFormat format,
                                SVGA3dSize size,
-                               uint32 numFaces,
+                               uint32 numLayers,
                                uint32 numMipLevels)
 {
    struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
@@ -265,7 +272,7 @@ vmw_svga_winsys_surface_can_create(struct svga_winsys_screen *sws,
 
    buffer_size = svga3dsurface_get_serialized_size(format, size, 
                                                    numMipLevels, 
-                                                   (numFaces == 6));
+                                                   numLayers);
    if (buffer_size > vws->ioctl.max_texture_size) {
 	return FALSE;
    }
@@ -323,14 +330,16 @@ vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws,
 {   
    struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
 
-   if (index > vws->ioctl.num_cap_3d || !vws->ioctl.cap_3d[index].has_cap)      
+   if (index > vws->ioctl.num_cap_3d ||
+       index >= SVGA3D_DEVCAP_MAX ||
+       !vws->ioctl.cap_3d[index].has_cap)
       return FALSE;
 
    *result = vws->ioctl.cap_3d[index].result;
    return TRUE;
 }
 
-static struct svga_winsys_gb_shader *
+struct svga_winsys_gb_shader *
 vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
 			      SVGA3dShaderType type,
 			      const uint32 *bytecode,
@@ -360,9 +369,11 @@ vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
    memcpy(code, bytecode, bytecodeLen);
    vmw_svga_winsys_buffer_unmap(sws, shader->buf);
 
-   shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
-   if(shader->shid == SVGA3D_INVALID_ID)
-      goto out_no_shid;
+   if (!sws->have_vgpu10) {
+      shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
+      if (shader->shid == SVGA3D_INVALID_ID)
+         goto out_no_shid;
+   }
 
    return svga_winsys_shader(shader);
 
@@ -374,7 +385,7 @@ out_no_shader:
    return NULL;
 }
 
-static void
+void
 vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
 			       struct svga_winsys_gb_shader *shader)
 {
@@ -405,6 +416,11 @@ vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws)
    vws->base.shader_destroy = vmw_svga_winsys_shader_destroy;
    vws->base.fence_finish = vmw_svga_winsys_fence_finish;
 
+   vws->base.query_create = vmw_svga_winsys_query_create;
+   vws->base.query_init = vmw_svga_winsys_query_init;
+   vws->base.query_destroy = vmw_svga_winsys_query_destroy;
+   vws->base.query_get_result = vmw_svga_winsys_query_get_result;
+
    return TRUE;
 }
 
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.c b/src/gallium/winsys/svga/drm/vmw_shader.c
index e82486ab608..56ffdd16f79 100644
--- a/src/gallium/winsys/svga/drm/vmw_shader.c
+++ b/src/gallium/winsys/svga/drm/vmw_shader.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009-2012 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -50,7 +50,8 @@ vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst,
    if (pipe_reference(dst_ref, src_ref)) {
       struct svga_winsys_screen *sws = &dst->screen->base;
 
-      vmw_ioctl_shader_destroy(dst->screen, dst->shid);
+      if (!sws->have_vgpu10)
+         vmw_ioctl_shader_destroy(dst->screen, dst->shid);
 #ifdef DEBUG
       /* to detect dangling pointers */
       assert(p_atomic_read(&dst->validated) == 0);
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h
index 28f99717391..c9a3638bcc6 100644
--- a/src/gallium/winsys/svga/drm/vmw_shader.h
+++ b/src/gallium/winsys/svga/drm/vmw_shader.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009-2012 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.c b/src/gallium/winsys/svga/drm/vmw_surface.c
index cf648b4dd93..6c0ad3bbf19 100644
--- a/src/gallium/winsys/svga/drm/vmw_surface.c
+++ b/src/gallium/winsys/svga/drm/vmw_surface.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.h b/src/gallium/winsys/svga/drm/vmw_surface.h
index 1291f380aa2..f8b582d2cef 100644
--- a/src/gallium/winsys/svga/drm/vmw_surface.h
+++ b/src/gallium/winsys/svga/drm/vmw_surface.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmwgfx_drm.h b/src/gallium/winsys/svga/drm/vmwgfx_drm.h
index 73ad20537ee..807ec901ad5 100644
--- a/src/gallium/winsys/svga/drm/vmwgfx_drm.h
+++ b/src/gallium/winsys/svga/drm/vmwgfx_drm.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -64,6 +64,7 @@
 #define DRM_VMW_GB_SURFACE_CREATE    23
 #define DRM_VMW_GB_SURFACE_REF       24
 #define DRM_VMW_SYNCCPU              25
+#define DRM_VMW_CREATE_EXTENDED_CONTEXT 26
 
 /*************************************************************************/
 /**
@@ -88,6 +89,8 @@
 #define DRM_VMW_PARAM_3D_CAPS_SIZE     8
 #define DRM_VMW_PARAM_MAX_MOB_MEMORY   9
 #define DRM_VMW_PARAM_MAX_MOB_SIZE     10
+#define DRM_VMW_PARAM_SCREEN_TARGET    11
+#define DRM_VMW_PARAM_VGPU10           12
 
 /**
  * enum drm_vmw_handle_type - handle type for ref ioctls
@@ -296,7 +299,7 @@ union drm_vmw_surface_reference_arg {
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
-#define DRM_VMW_EXECBUF_VERSION 1
+#define DRM_VMW_EXECBUF_VERSION 2
 
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
@@ -305,6 +308,8 @@ struct drm_vmw_execbuf_arg {
 	uint64_t fence_rep;
 	uint32_t version;
 	uint32_t flags;
+	uint32_t context_handle;
+	uint32_t pad64;
 };
 
 /**
@@ -826,7 +831,6 @@ struct drm_vmw_update_layout_arg {
 enum drm_vmw_shader_type {
 	drm_vmw_shader_type_vs = 0,
 	drm_vmw_shader_type_ps,
-	drm_vmw_shader_type_gs
 };
 
 
@@ -908,6 +912,8 @@ enum drm_vmw_surface_flags {
  * @buffer_handle     Buffer handle of backup buffer. SVGA3D_INVALID_ID
  *                    if none.
  * @base_size         Size of the base mip level for all faces.
+ * @array_size        Must be zero for non-vgpu10 hardware, and if non-zero
+ *                    svga3d_flags must have proper bind flags setup.
  *
  * Input argument to the  DRM_VMW_GB_SURFACE_CREATE Ioctl.
  * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
@@ -920,7 +926,7 @@ struct drm_vmw_gb_surface_create_req {
 	uint32_t multisample_count;
 	uint32_t autogen_filter;
 	uint32_t buffer_handle;
-	uint32_t pad64;
+	uint32_t array_size;
 	struct drm_vmw_size base_size;
 };
 
@@ -1060,4 +1066,28 @@ struct drm_vmw_synccpu_arg {
 	uint32_t pad64;
 };
 
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_EXTENDED_CONTEXT - Create a host context.
+ *
+ * Allocates a device unique context id, and queues a create context command
+ * for the host. Does not wait for host completion.
+ */
+enum drm_vmw_extended_context {
+	drm_vmw_context_legacy,
+	drm_vmw_context_vgpu10
+};
+
+/**
+ * union drm_vmw_extended_context_arg
+ *
+ * @req: Context type.
+ * @rep: Context identifier.
+ *
+ * Argument to the DRM_VMW_CREATE_EXTENDED_CONTEXT Ioctl.
+ */
+union drm_vmw_extended_context_arg {
+	enum drm_vmw_extended_context req;
+	struct drm_vmw_context_arg rep;
+};
 #endif
-- 
cgit v1.2.3


From 133a47107cc03a69b4b88c1fef331d507f521fa7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 14:54:24 -0600
Subject: gallium/st: add pipe_context::get_timestamp()

The VMware svga driver doesn't directly support pipe_screen::get_timestamp()
but we can do a work-around.  However, we need a gallium context to do so.
This patch adds a new pipe_context::get_timestamp() function that will only
be called if the pipe_screen::get_timestamp() function is NULL.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/include/pipe/p_context.h    |  7 +++++++
 src/mesa/state_tracker/st_cb_queryobj.c | 13 +++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 9d8f5bdc8d2..6f9fe767404 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -591,6 +591,13 @@ struct pipe_context {
                                unsigned sample_index,
                                float *out_value);
 
+   /**
+    * Query a timestamp in nanoseconds.  This is completely equivalent to
+    * pipe_screen::get_timestamp() but takes a context handle for drivers
+    * that require a context.
+    */
+   uint64_t (*get_timestamp)(struct pipe_context *);
+
    /**
     * Flush the resource cache, so that the resource can be used
     * by an external client. Possible usage:
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index 71222e80b6b..aafae16b2df 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -289,9 +289,18 @@ st_CheckQuery(struct gl_context *ctx, struct gl_query_object *q)
 static uint64_t
 st_GetTimestamp(struct gl_context *ctx)
 {
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_screen *screen = pipe->screen;
 
-   return screen->get_timestamp(screen);
+   /* Prefer the per-screen function */
+   if (screen->get_timestamp) {
+      return screen->get_timestamp(screen);
+   }
+   else {
+      /* Fall back to the per-context function */
+      assert(pipe->get_timestamp);
+      return pipe->get_timestamp(pipe);
+   }
 }
 
 
-- 
cgit v1.2.3


From 1c5468e9c0b2f6c832051de0070c8ef49a0c9fd2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 16:11:14 -0600
Subject: svga: remove unused SVGA3D_* command functions

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_cmd.c | 134 ------------------------------------
 src/gallium/drivers/svga/svga_cmd.h |  22 ------
 2 files changed, 156 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index b0cbc1354ca..1f64980a8ef 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -1614,36 +1614,6 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
 }
 
 
-enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes)
-{
-   SVGA3dCmdDefineGBShader *cmd;
-
-   assert(sizeInBytes % 4 == 0);
-   assert(type == SVGA3D_SHADERTYPE_VS ||
-          type == SVGA3D_SHADERTYPE_PS);
-
-   cmd = SVGA3D_FIFOReserve(swc,
-                            SVGA_3D_CMD_DEFINE_GB_SHADER,
-                            sizeof *cmd,
-                            1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
-   cmd->type = type;
-   cmd->sizeInBytes = sizeInBytes;
-
-   swc->commit(swc);
-   
-   return PIPE_OK;
-}
-
-
 enum pipe_error
 SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader)
@@ -1692,27 +1662,6 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
 }
 
 
-enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader)
-{
-   SVGA3dCmdDestroyGBShader *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_SHADER,
-                         sizeof *cmd,
-                         1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
 /**
  * \param flags  mask of SVGA_RELOC_READ / _WRITE
  */
@@ -1738,89 +1687,6 @@ SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
 }
 
 
-enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDefineGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DEFINE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDestroyGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdBindGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_BIND_GB_CONTEXT,
-                         sizeof *cmd,
-                         2);  /* two relocations */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-   swc->context_relocation(swc, &cmd->mobid);
-   cmd->validContents = 0;  /* XXX pass as a parameter? */
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdInvalidateGBContext *cmd =
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_INVALIDATE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-
 /**
  * Update an image in a guest-backed surface.
  * (Inform the device that the guest-contents have been updated.)
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
index 6f658bf3abf..2348aa65c1d 100644
--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -228,12 +228,6 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
  * Guest-backed surface functions
  */
 
-enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes);
-
 enum pipe_error
 SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader);
@@ -243,26 +237,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                    SVGA3dShaderType type,
                    struct svga_winsys_gb_shader *gbshader);
 
-enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader);
-
 enum pipe_error
 SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
                      struct svga_winsys_surface *surface);
 
-enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc);
-
 enum pipe_error
 SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
                      struct svga_winsys_surface *surface,
-- 
cgit v1.2.3


From 35bb29d4994efadd1719a147731afa34e78a0be1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 14:56:51 -0600
Subject: svga: add new svga_tgsi_vgpu10.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 6778 +++++++++++++++++++++++++++
 1 file changed, 6778 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_tgsi_vgpu10.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
new file mode 100644
index 00000000000..60928d7a790
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -0,0 +1,6778 @@
+/**********************************************************
+ * Copyright 1998-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_tgsi_vgpu10.c
+ *
+ * TGSI -> VGPU10 shader translation.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_two_side.h"
+#include "tgsi/tgsi_aa_point.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_debug.h"
+#include "util/u_pstipple.h"
+
+#include "svga_context.h"
+#include "svga_debug.h"
+#include "svga_link.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+#include "VGPU10ShaderTokens.h"
+
+
+#define INVALID_INDEX 99999
+#define MAX_INTERNAL_TEMPS 3
+#define MAX_SYSTEM_VALUES 4
+#define MAX_IMMEDIATE_COUNT \
+        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
+#define MAX_TEMP_ARRAYS 64  /* Enough? */
+
+
+/**
+ * Clipping is complicated.  There's four different cases which we
+ * handle during VS/GS shader translation:
+ */
+enum clipping_mode
+{
+   CLIP_NONE,     /**< No clipping enabled */
+   CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
+                   * one or more user-defined clip planes are enabled.  We
+                   * generate extra code to emit clip distances.
+                   */
+   CLIP_DISTANCE, /**< The shader already declares clip distance output
+                   * registers and has code to write to them.
+                   */
+   CLIP_VERTEX    /**< The shader declares a clip vertex output register and
+                  * has code that writes to the register.  We convert the
+                  * clipvertex position into one or more clip distances.
+                  */
+};
+
+
+struct svga_shader_emitter_v10
+{
+   /* The token output buffer */
+   unsigned size;
+   char *buf;
+   char *ptr;
+
+   /* Information about the shader and state (does not change) */
+   struct svga_compile_key key;
+   struct tgsi_shader_info info;
+   unsigned unit;
+
+   unsigned inst_start_token;
+   boolean discard_instruction; /**< throw away current instruction? */
+
+   union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
+   unsigned num_immediates;      /**< Number of immediates emitted */
+   unsigned common_immediate_pos[8];  /**< literals for common immediates */
+   unsigned num_common_immediates;
+   boolean immediates_emitted;
+
+   unsigned num_outputs;      /**< include any extra outputs */
+                              /**  The first extra output is reserved for
+                               *   non-adjusted vertex position for
+                               *   stream output purpose
+                               */
+
+   /* Temporary Registers */
+   unsigned num_shader_temps; /**< num of temps used by original shader */
+   unsigned internal_temp_count;  /**< currently allocated internal temps */
+   struct {
+      unsigned start, size;
+   } temp_arrays[MAX_TEMP_ARRAYS];
+   unsigned num_temp_arrays;
+
+   /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
+   struct {
+      unsigned arrayId, index;
+   } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
+
+   /** Number of constants used by original shader for each constant buffer.
+    * The size should probably always match with that of svga_state.constbufs.
+    */
+   unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
+
+   /* Samplers */
+   unsigned num_samplers;
+
+   /* Address regs (really implemented with temps) */
+   unsigned num_address_regs;
+   unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
+
+   /* Output register usage masks */
+   ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
+
+   /* To map TGSI system value index to VGPU shader input indexes */
+   ubyte system_value_indexes[MAX_SYSTEM_VALUES];
+
+   struct {
+      /* vertex position scale/translation */
+      unsigned out_index;  /**< the real position output reg */
+      unsigned tmp_index;  /**< the fake/temp position output reg */
+      unsigned so_index;   /**< the non-adjusted position output reg */
+      unsigned prescale_scale_index, prescale_trans_index;
+      boolean  need_prescale;
+   } vposition;
+
+   /* For vertex shaders only */
+   struct {
+      /* viewport constant */
+      unsigned viewport_index;
+
+      /* temp index of adjusted vertex attributes */
+      unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
+   } vs;
+
+   /* For fragment shaders only */
+   struct {
+      /* apha test */
+      unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
+      unsigned color_tmp_index;  /**< fake/temp color output reg */
+      unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
+
+      /* front-face */
+      unsigned face_input_index; /**< real fragment shader face reg (bool) */
+      unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
+
+      unsigned pstipple_sampler_unit;
+
+      unsigned fragcoord_input_index;  /**< real fragment position input reg */
+      unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
+   } fs;
+
+   /* For geometry shaders only */
+   struct {
+      VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
+      VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
+      unsigned input_size;       /**< size of input arrays */
+      unsigned prim_id_index;    /**< primitive id register index */
+      unsigned max_out_vertices; /**< maximum number of output vertices */
+   } gs;
+
+   /* For vertex or geometry shaders */
+   enum clipping_mode clip_mode;
+   unsigned clip_dist_out_index; /**< clip distance output register index */
+   unsigned clip_dist_tmp_index; /**< clip distance temporary register */
+   unsigned clip_dist_so_index;  /**< clip distance shadow copy */
+
+   /** Index of temporary holding the clipvertex coordinate */
+   unsigned clip_vertex_out_index; /**< clip vertex output register index */
+   unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
+
+   /* user clip plane constant slot indexes */
+   unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+
+   boolean uses_flat_interp;
+
+   /* For all shaders: const reg index for RECT coord scaling */
+   unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
+
+   /* For all shaders: const reg index for texture buffer size */
+   unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
+
+   /* VS/GS/FS Linkage info */
+   struct shader_linkage linkage;
+
+   bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
+};
+
+
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit);
+
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst);
+
+static char err_buf[128];
+
+static boolean
+expand(struct svga_shader_emitter_v10 *emit)
+{
+   char *new_buf;
+   unsigned newsize = emit->size * 2;
+
+   if (emit->buf != err_buf)
+      new_buf = REALLOC(emit->buf, emit->size, newsize);
+   else
+      new_buf = NULL;
+
+   if (new_buf == NULL) {
+      emit->ptr = err_buf;
+      emit->buf = err_buf;
+      emit->size = sizeof(err_buf);
+      return FALSE;
+   }
+
+   emit->size = newsize;
+   emit->ptr = new_buf + (emit->ptr - emit->buf);
+   emit->buf = new_buf;
+   return TRUE;
+}
+
+/**
+ * Create and initialize a new svga_shader_emitter_v10 object.
+ */
+static struct svga_shader_emitter_v10 *
+alloc_emitter(void)
+{
+   struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
+
+   if (!emit)
+      return NULL;
+
+   /* to initialize the output buffer */
+   emit->size = 512;
+   if (!expand(emit)) {
+      FREE(emit);
+      return NULL;
+   }
+   return emit;
+}
+
+/**
+ * Free an svga_shader_emitter_v10 object.
+ */
+static void
+free_emitter(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit);
+   FREE(emit->buf);    /* will be NULL if translation succeeded */
+   FREE(emit);
+}
+
+static INLINE boolean
+reserve(struct svga_shader_emitter_v10 *emit,
+        unsigned nr_dwords)
+{
+   while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
+      if (!expand(emit))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean
+emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
+{
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   *(uint32 *)emit->ptr = dword;
+   emit->ptr += sizeof dword;
+   return TRUE;
+}
+
+static boolean
+emit_dwords(struct svga_shader_emitter_v10 *emit,
+            const uint32 *dwords,
+            unsigned nr)
+{
+   if (!reserve(emit, nr))
+      return FALSE;
+
+   memcpy(emit->ptr, dwords, nr * sizeof *dwords);
+   emit->ptr += nr * sizeof *dwords;
+   return TRUE;
+}
+
+/** Return the number of tokens in the emitter's buffer */
+static unsigned
+emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
+{
+   return (emit->ptr - emit->buf) / sizeof(unsigned);
+}
+
+
+/**
+ * Check for register overflow.  If we overflow we'll set an
+ * error flag.  This function can be called for register declarations
+ * or use as src/dst instruction operands.
+ * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
+                or VGPU10_OPCODE_DCL_x
+ * \param index  the register index
+ */
+static void
+check_register_index(struct svga_shader_emitter_v10 *emit,
+                     unsigned operandType, unsigned index)
+{
+   bool overflow_before = emit->register_overflow;
+
+   switch (operandType) {
+   case VGPU10_OPERAND_TYPE_TEMP:
+   case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
+   case VGPU10_OPCODE_DCL_TEMPS:
+      if (index >= VGPU10_MAX_TEMPS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
+   case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
+      if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_INPUT:
+   case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
+   case VGPU10_OPCODE_DCL_INPUT:
+   case VGPU10_OPCODE_DCL_INPUT_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_SIV:
+   case VGPU10_OPCODE_DCL_INPUT_PS:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_INPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT_SGV:
+   case VGPU10_OPCODE_DCL_OUTPUT_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_OUTPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_SAMPLER:
+   case VGPU10_OPCODE_DCL_SAMPLER:
+      if (index >= VGPU10_MAX_SAMPLERS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_RESOURCE:
+   case VGPU10_OPCODE_DCL_RESOURCE:
+      if (index >= VGPU10_MAX_RESOURCES) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
+      if (index >= MAX_IMMEDIATE_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   default:
+      assert(0);
+      ; /* nothing */
+   }
+
+   if (emit->register_overflow && !overflow_before) {
+      debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
+                   operandType, index);
+   }
+}
+
+
+/**
+ * Examine misc state to determine the clipping mode.
+ */
+static void
+determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->info.num_written_clipdistance > 0) {
+      emit->clip_mode = CLIP_DISTANCE;
+   }
+   else if (emit->info.writes_clipvertex) {
+      emit->clip_mode = CLIP_VERTEX;
+   }
+   else if (emit->key.clip_plane_enable) {
+      emit->clip_mode = CLIP_LEGACY;
+   }
+   else {
+      emit->clip_mode = CLIP_NONE;
+   }
+}
+
+
+/**
+ * For clip distance register declarations and clip distance register
+ * writes we need to mask the declaration usage or instruction writemask
+ * (respectively) against the set of the really-enabled clipping planes.
+ *
+ * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
+ * has a VS that writes to all 8 clip distance registers, but the plane enable
+ * flags are a subset of that.
+ *
+ * This function is used to apply the plane enable flags to the register
+ * declaration or instruction writemask.
+ *
+ * \param writemask  the declaration usage mask or instruction writemask
+ * \param clip_reg_index  which clip plane register is being declared/written.
+ *                        The legal values are 0 and 1 (two clip planes per
+ *                        register, for a total of 8 clip planes)
+ */
+static unsigned
+apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
+                      unsigned writemask, unsigned clip_reg_index)
+{
+   unsigned shift;
+
+   assert(clip_reg_index < 2);
+
+   /* four clip planes per clip register: */
+   shift = clip_reg_index * 4;
+   writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
+
+   return writemask;
+}
+
+
+/**
+ * Translate gallium shader type into VGPU10 type.
+ */
+static VGPU10_PROGRAM_TYPE
+translate_shader_type(unsigned type)
+{
+   switch (type) {
+   case PIPE_SHADER_VERTEX:
+      return VGPU10_VERTEX_SHADER;
+   case PIPE_SHADER_GEOMETRY:
+      return VGPU10_GEOMETRY_SHADER;
+   case PIPE_SHADER_FRAGMENT:
+      return VGPU10_PIXEL_SHADER;
+   default:
+      assert(!"Unexpected shader type");
+      return VGPU10_VERTEX_SHADER;
+   }
+}
+
+
+/**
+ * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
+ * Note: we only need to translate the opcodes for "simple" instructions,
+ * as seen below.  All other opcodes are handled/translated specially.
+ */
+static VGPU10_OPCODE_TYPE
+translate_opcode(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_MOV:
+      return VGPU10_OPCODE_MOV;
+   case TGSI_OPCODE_MUL:
+      return VGPU10_OPCODE_MUL;
+   case TGSI_OPCODE_ADD:
+      return VGPU10_OPCODE_ADD;
+   case TGSI_OPCODE_DP3:
+      return VGPU10_OPCODE_DP3;
+   case TGSI_OPCODE_DP4:
+      return VGPU10_OPCODE_DP4;
+   case TGSI_OPCODE_MIN:
+      return VGPU10_OPCODE_MIN;
+   case TGSI_OPCODE_MAX:
+      return VGPU10_OPCODE_MAX;
+   case TGSI_OPCODE_MAD:
+      return VGPU10_OPCODE_MAD;
+   case TGSI_OPCODE_SQRT:
+      return VGPU10_OPCODE_SQRT;
+   case TGSI_OPCODE_FRC:
+      return VGPU10_OPCODE_FRC;
+   case TGSI_OPCODE_FLR:
+      return VGPU10_OPCODE_ROUND_NI;
+   case TGSI_OPCODE_FSEQ:
+      return VGPU10_OPCODE_EQ;
+   case TGSI_OPCODE_FSGE:
+      return VGPU10_OPCODE_GE;
+   case TGSI_OPCODE_FSNE:
+      return VGPU10_OPCODE_NE;
+   case TGSI_OPCODE_DDX:
+      return VGPU10_OPCODE_DERIV_RTX;
+   case TGSI_OPCODE_DDY:
+      return VGPU10_OPCODE_DERIV_RTY;
+   case TGSI_OPCODE_RET:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_DIV:
+      return VGPU10_OPCODE_DIV;
+   case TGSI_OPCODE_IDIV:
+      return VGPU10_OPCODE_IDIV;
+   case TGSI_OPCODE_DP2:
+      return VGPU10_OPCODE_DP2;
+   case TGSI_OPCODE_BRK:
+      return VGPU10_OPCODE_BREAK;
+   case TGSI_OPCODE_IF:
+      return VGPU10_OPCODE_IF;
+   case TGSI_OPCODE_ELSE:
+      return VGPU10_OPCODE_ELSE;
+   case TGSI_OPCODE_ENDIF:
+      return VGPU10_OPCODE_ENDIF;
+   case TGSI_OPCODE_CEIL:
+      return VGPU10_OPCODE_ROUND_PI;
+   case TGSI_OPCODE_I2F:
+      return VGPU10_OPCODE_ITOF;
+   case TGSI_OPCODE_NOT:
+      return VGPU10_OPCODE_NOT;
+   case TGSI_OPCODE_TRUNC:
+      return VGPU10_OPCODE_ROUND_Z;
+   case TGSI_OPCODE_SHL:
+      return VGPU10_OPCODE_ISHL;
+   case TGSI_OPCODE_AND:
+      return VGPU10_OPCODE_AND;
+   case TGSI_OPCODE_OR:
+      return VGPU10_OPCODE_OR;
+   case TGSI_OPCODE_XOR:
+      return VGPU10_OPCODE_XOR;
+   case TGSI_OPCODE_CONT:
+      return VGPU10_OPCODE_CONTINUE;
+   case TGSI_OPCODE_EMIT:
+      return VGPU10_OPCODE_EMIT;
+   case TGSI_OPCODE_ENDPRIM:
+      return VGPU10_OPCODE_CUT;
+   case TGSI_OPCODE_BGNLOOP:
+      return VGPU10_OPCODE_LOOP;
+   case TGSI_OPCODE_ENDLOOP:
+      return VGPU10_OPCODE_ENDLOOP;
+   case TGSI_OPCODE_ENDSUB:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_NOP:
+      return VGPU10_OPCODE_NOP;
+   case TGSI_OPCODE_BREAKC:
+      return VGPU10_OPCODE_BREAKC;
+   case TGSI_OPCODE_END:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_F2I:
+      return VGPU10_OPCODE_FTOI;
+   case TGSI_OPCODE_IMAX:
+      return VGPU10_OPCODE_IMAX;
+   case TGSI_OPCODE_IMIN:
+      return VGPU10_OPCODE_IMIN;
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      return VGPU10_OPCODE_UDIV;
+   case TGSI_OPCODE_IMUL_HI:
+      return VGPU10_OPCODE_IMUL;
+   case TGSI_OPCODE_INEG:
+      return VGPU10_OPCODE_INEG;
+   case TGSI_OPCODE_ISHR:
+      return VGPU10_OPCODE_ISHR;
+   case TGSI_OPCODE_ISGE:
+      return VGPU10_OPCODE_IGE;
+   case TGSI_OPCODE_ISLT:
+      return VGPU10_OPCODE_ILT;
+   case TGSI_OPCODE_F2U:
+      return VGPU10_OPCODE_FTOU;
+   case TGSI_OPCODE_UADD:
+      return VGPU10_OPCODE_IADD;
+   case TGSI_OPCODE_U2F:
+      return VGPU10_OPCODE_UTOF;
+   case TGSI_OPCODE_UCMP:
+      return VGPU10_OPCODE_MOVC;
+   case TGSI_OPCODE_UMAD:
+      return VGPU10_OPCODE_UMAD;
+   case TGSI_OPCODE_UMAX:
+      return VGPU10_OPCODE_UMAX;
+   case TGSI_OPCODE_UMIN:
+      return VGPU10_OPCODE_UMIN;
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMUL_HI:
+      return VGPU10_OPCODE_UMUL;
+   case TGSI_OPCODE_USEQ:
+      return VGPU10_OPCODE_IEQ;
+   case TGSI_OPCODE_USGE:
+      return VGPU10_OPCODE_UGE;
+   case TGSI_OPCODE_USHR:
+      return VGPU10_OPCODE_USHR;
+   case TGSI_OPCODE_USLT:
+      return VGPU10_OPCODE_ULT;
+   case TGSI_OPCODE_USNE:
+      return VGPU10_OPCODE_INE;
+   case TGSI_OPCODE_SWITCH:
+      return VGPU10_OPCODE_SWITCH;
+   case TGSI_OPCODE_CASE:
+      return VGPU10_OPCODE_CASE;
+   case TGSI_OPCODE_DEFAULT:
+      return VGPU10_OPCODE_DEFAULT;
+   case TGSI_OPCODE_ENDSWITCH:
+      return VGPU10_OPCODE_ENDSWITCH;
+   case TGSI_OPCODE_FSLT:
+      return VGPU10_OPCODE_LT;
+   case TGSI_OPCODE_ROUND:
+      return VGPU10_OPCODE_ROUND_NE;
+   default:
+      assert(!"Unexpected TGSI opcode in translate_opcode()");
+      return VGPU10_OPCODE_NOP;
+   }
+}
+
+
+/**
+ * Translate a TGSI register file type into a VGPU10 operand type.
+ * \param array  is the TGSI_FILE_TEMPORARY register an array?
+ */
+static VGPU10_OPERAND_TYPE
+translate_register_file(enum tgsi_file_type file, boolean array)
+{
+   switch (file) {
+   case TGSI_FILE_CONSTANT:
+      return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   case TGSI_FILE_INPUT:
+      return VGPU10_OPERAND_TYPE_INPUT;
+   case TGSI_FILE_OUTPUT:
+      return VGPU10_OPERAND_TYPE_OUTPUT;
+   case TGSI_FILE_TEMPORARY:
+      return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
+                   : VGPU10_OPERAND_TYPE_TEMP;
+   case TGSI_FILE_IMMEDIATE:
+      /* all immediates are 32-bit values at this time so
+       * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
+       */
+      return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
+   case TGSI_FILE_SAMPLER:
+      return VGPU10_OPERAND_TYPE_SAMPLER;
+   case TGSI_FILE_SYSTEM_VALUE:
+      return VGPU10_OPERAND_TYPE_INPUT;
+
+   /* XXX TODO more cases to finish */
+
+   default:
+      assert(!"Bad tgsi register file!");
+      return VGPU10_OPERAND_TYPE_NULL;
+   }
+}
+
+
+/**
+ * Emit a null dst register
+ */
+static void
+emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand;
+
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_NULL;
+   operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
+
+   emit_dword(emit, operand.value);
+}
+
+
+/**
+ * If the given register is a temporary, return the array ID.
+ * Else return zero.
+ */
+static unsigned
+get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
+                  unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].arrayId;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+/**
+ * If the given register is a temporary, convert the index from a TGSI
+ * TEMPORARY index to a VGPU10 temp index.
+ */
+static unsigned
+remap_temp_index(const struct svga_shader_emitter_v10 *emit,
+                 unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].index;
+   }
+   else {
+      return index;
+   }
+}
+
+
+/**
+ * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
+ * Note: the operandType field must already be initialized.
+ */
+static VGPU10OperandToken0
+setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
+                        VGPU10OperandToken0 operand0,
+                        unsigned file,
+                        boolean indirect, boolean index2D,
+                        unsigned tempArrayID)
+{
+   unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
+
+   /*
+    * Compute index dimensions
+    */
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
+       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      indexDim = VGPU10_OPERAND_INDEX_0D;
+      assert(operand0.selectionMode == 0);
+   }
+   else {
+      if (index2D ||
+          tempArrayID > 0 ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         indexDim = VGPU10_OPERAND_INDEX_2D;
+      }
+      else {
+         indexDim = VGPU10_OPERAND_INDEX_1D;
+      }
+   }
+
+   /*
+    * Compute index representations (immediate, relative, etc).
+    */
+   if (tempArrayID > 0) {
+      assert(file == TGSI_FILE_TEMPORARY);
+      /* First index is the array ID, second index is the array element */
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      if (indirect) {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      }
+   }
+   else if (indirect) {
+      if (file == TGSI_FILE_CONSTANT) {
+         /* index[0] indicates which constant buffer while index[1] indicates
+          * the position in the constant buffer.
+          */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         /* All other register files are 1-dimensional */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+   }
+   else {
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   }
+
+   operand0.indexDimension = indexDim;
+   operand0.index0Representation = index0Rep;
+   operand0.index1Representation = index1Rep;
+
+   return operand0;
+}
+
+
+/**
+ * Emit the operand for expressing an address register for indirect indexing.
+ * Note that the address register is really just a temp register.
+ * \param addr_reg_index  which address register to use
+ */
+static void
+emit_indirect_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned addr_reg_index)
+{
+   unsigned tmp_reg_index;
+   VGPU10OperandToken0 operand0;
+
+   assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
+
+   tmp_reg_index = emit->address_reg_index[addr_reg_index];
+
+   /* operand0 is a simple temporary register, selecting one component */
+   operand0.value = 0;
+   operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
+}
+
+
+/**
+ * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
+ * \param emit  the emitter context
+ * \param reg  the TGSI dst register to translate
+ */
+static void
+emit_dst_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_dst_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned sem_name = emit->info.output_semantic_name[index];
+   const unsigned sem_index = emit->info.output_semantic_index[index];
+   unsigned writemask = reg->Register.WriteMask;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   VGPU10OperandToken0 operand0;
+
+   if (file == TGSI_FILE_OUTPUT) {
+      if (emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY) {
+         if (index == emit->vposition.out_index &&
+             emit->vposition.tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
+             * vertex position result in a temporary so that we can modify
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vposition.tmp_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
+                  emit->clip_dist_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
+             * We store the clip distance in a temporary first, then
+             * we'll copy it to the shadow copy and to CLIPDIST with the
+             * enabled planes mask in emit_clip_distance_instructions().
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_dist_tmp_index + sem_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
+                  emit->clip_vertex_tmp_index != INVALID_INDEX) {
+            /* replace the CLIPVERTEX output register with a temporary */
+            assert(emit->clip_mode == CLIP_VERTEX);
+            assert(sem_index == 0);
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_vertex_tmp_index;
+         }
+      }
+      else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (sem_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output register */
+            operand0.value = 0;
+            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+            emit_dword(emit, operand0.value);
+            return;
+         }
+         else if (index == emit->fs.color_out_index[0] &&
+             emit->fs.color_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
+             * fragment color result in a temporary so that we can read it
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.color_tmp_index;
+         }
+         else {
+            /* Typically, for fragment shaders, the output register index
+             * matches the color semantic index.  But not when we write to
+             * the fragment depth register.  In that case, OUT[0] will be
+             * fragdepth and OUT[1] will be the 0th color output.  We need
+             * to use the semantic index for color outputs.
+             */
+            assert(sem_name == TGSI_SEMANTIC_COLOR);
+            index = emit->info.output_semantic_index[index];
+         }
+      }
+   }
+
+   /* init operand tokens to all zero */
+   operand0.value = 0;
+
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   /* the operand has a writemask */
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+
+   /* Which of the four dest components to write to. Note that we can use a
+    * simple assignment here since TGSI writemasks match VGPU10 writemasks.
+    */
+   STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
+   operand0.mask = writemask;
+
+   /* translate TGSI register file type to VGPU10 operand type */
+   operand0.operandType = translate_register_file(file, tempArrayId > 0);
+
+   check_register_index(emit, operand0.operandType, index);
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   /* Emit tokens */
+   emit_dword(emit, operand0.value);
+   if (tempArrayId > 0) {
+      emit_dword(emit, tempArrayId);
+   }
+
+   emit_dword(emit, remap_temp_index(emit, file, index));
+
+   if (indirect) {
+      emit_indirect_register(emit, reg->Indirect.Index);
+   }
+}
+
+
+/**
+ * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ */
+static void
+emit_src_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_src_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   const unsigned swizzleX = reg->Register.SwizzleX;
+   const unsigned swizzleY = reg->Register.SwizzleY;
+   const unsigned swizzleZ = reg->Register.SwizzleZ;
+   const unsigned swizzleW = reg->Register.SwizzleW;
+   const unsigned absolute = reg->Register.Absolute;
+   const unsigned negate = reg->Register.Negate;
+   bool is_prim_id = FALSE;
+
+   VGPU10OperandToken0 operand0;
+   VGPU10OperandToken1 operand1;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+      file == TGSI_FILE_INPUT) {
+      if (index == emit->fs.face_input_index) {
+         /* Replace INPUT[FACE] with TEMP[FACE] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.face_tmp_index;
+      }
+      else if (index == emit->fs.fragcoord_input_index) {
+         /* Replace INPUT[POSITION] with TEMP[POSITION] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.fragcoord_tmp_index;
+      }
+      else {
+         /* We remap fragment shader inputs to that FS input indexes
+          * match up with VS/GS output indexes.
+          */
+         index = emit->linkage.input_map[index];
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY &&
+            file == TGSI_FILE_INPUT) {
+      is_prim_id = (index == emit->gs.prim_id_index);
+      index = emit->linkage.input_map[index];
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (file == TGSI_FILE_INPUT) {
+         /* if input is adjusted... */
+         if ((emit->key.vs.adjust_attrib_w_1 |
+              emit->key.vs.adjust_attrib_itof |
+              emit->key.vs.adjust_attrib_utof |
+              emit->key.vs.attrib_is_bgra |
+              emit->key.vs.attrib_puint_to_snorm |
+              emit->key.vs.attrib_puint_to_uscaled |
+              emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vs.adjusted_input[index];
+         }
+      }
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         assert(index < Elements(emit->system_value_indexes));
+         index = emit->system_value_indexes[index];
+      }
+   }
+
+   operand0.value = operand1.value = 0;
+
+   if (is_prim_id) {
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+   }
+   else {
+      operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+      operand0.operandType = translate_register_file(file, tempArrayId > 0);
+   }
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
+       operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      if (swizzleX == swizzleY &&
+          swizzleX == swizzleZ &&
+          swizzleX == swizzleW) {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+      }
+      else {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+      }
+
+      operand0.swizzleX = swizzleX;
+      operand0.swizzleY = swizzleY;
+      operand0.swizzleZ = swizzleZ;
+      operand0.swizzleW = swizzleW;
+
+      if (absolute || negate) {
+         operand0.extended = 1;
+         operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
+         if (absolute && !negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
+         if (!absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
+         if (absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
+      }
+   }
+
+   /* Emit the operand tokens */
+   emit_dword(emit, operand0.value);
+   if (operand0.extended)
+      emit_dword(emit, operand1.value);
+
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
+      /* Emit the four float/int in-line immediate values */
+      unsigned *c;
+      assert(index < Elements(emit->immediates));
+      assert(file == TGSI_FILE_IMMEDIATE);
+      assert(swizzleX < 4);
+      assert(swizzleY < 4);
+      assert(swizzleZ < 4);
+      assert(swizzleW < 4);
+      c = (unsigned *) emit->immediates[index];
+      emit_dword(emit, c[swizzleX]);
+      emit_dword(emit, c[swizzleY]);
+      emit_dword(emit, c[swizzleZ]);
+      emit_dword(emit, c[swizzleW]);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
+      /* Emit the register index(es) */
+      if (index2d ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         emit_dword(emit, reg->Dimension.Index);
+      }
+
+      if (tempArrayId > 0) {
+         emit_dword(emit, tempArrayId);
+      }
+
+      emit_dword(emit, remap_temp_index(emit, file, index));
+
+      if (indirect) {
+         emit_indirect_register(emit, reg->Indirect.Index);
+      }
+   }
+}
+
+
+/**
+ * Emit a resource operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_resource_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned resource_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_Y;
+   operand0.swizzleZ = VGPU10_COMPONENT_Z;
+   operand0.swizzleW = VGPU10_COMPONENT_W;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, resource_number);
+}
+
+
+/**
+ * Emit a sampler operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_sampler_register(struct svga_shader_emitter_v10 *emit,
+                      unsigned sampler_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, sampler_number);
+}
+
+
+/**
+ * Emit an operand which reads the IS_FRONT_FACING register.
+ */
+static void
+emit_face_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand0;
+   unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_X;
+   operand0.swizzleZ = VGPU10_COMPONENT_X;
+   operand0.swizzleW = VGPU10_COMPONENT_X;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, index);
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode.
+ * \param saturate   clamp result to [0,1]?
+ */
+static void
+emit_opcode(struct svga_shader_emitter_v10 *emit,
+            unsigned vgpu10_opcode, boolean saturate)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit the token for a VGPU10 resinfo instruction.
+ * \param modifier   return type modifier, _uint or _rcpFloat.
+ *                   TODO: We may want to remove this parameter if it will
+ *                   only ever be used as _uint.
+ */
+static void
+emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
+                    VGPU10_RESINFO_RETURN_TYPE modifier)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = VGPU10_OPCODE_RESINFO;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.resinfoReturnType = modifier;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit opcode tokens for a texture sample instruction.  Texture instructions
+ * can be rather complicated (texel offsets, etc) so we have this specialized
+ * function.
+ */
+static void
+emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
+                   unsigned vgpu10_opcode, boolean saturate,
+                   const int offsets[3])
+{
+   VGPU10OpcodeToken0 token0;
+   VGPU10OpcodeToken1 token1;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   if (offsets[0] || offsets[1] || offsets[2]) {
+      assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+
+      token0.extended = 1;
+      token1.value = 0;
+      token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+      token1.offsetU = offsets[0];
+      token1.offsetV = offsets[1];
+      token1.offsetW = offsets[2];
+   }
+
+   emit_dword(emit, token0.value);
+   if (token0.extended) {
+      emit_dword(emit, token1.value);
+   }
+}
+
+
+/**
+ * Emit a DISCARD opcode token.
+ * If nonzero is set, we'll discard the fragment if the X component is not 0.
+ * Otherwise, we'll discard the fragment if the X component is 0.
+ */
+static void
+emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
+   if (nonzero)
+      opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   emit_dword(emit, opcode0.value);
+}
+
+
+/**
+ * We need to call this before we begin emitting a VGPU10 instruction.
+ */
+static void
+begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->inst_start_token == 0);
+   /* Save location of the instruction's VGPU10OpcodeToken0 token.
+    * Note, we can't save a pointer because it would become invalid if
+    * we have to realloc the output buffer.
+    */
+   emit->inst_start_token = emit_get_num_tokens(emit);
+}
+
+
+/**
+ * We need to call this after we emit the last token of a VGPU10 instruction.
+ * This function patches in the opcode token's instructionLength field.
+ */
+static void
+end_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
+   unsigned inst_length;
+
+   assert(emit->inst_start_token > 0);
+
+   if (emit->discard_instruction) {
+      /* Back up the emit->ptr to where this instruction started so
+       * that we discard the current instruction.
+       */
+      emit->ptr = (char *) (tokens + emit->inst_start_token);
+   }
+   else {
+      /* Compute instruction length and patch that into the start of
+       * the instruction.
+       */
+      inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
+
+      assert(inst_length > 0);
+
+      tokens[emit->inst_start_token].instructionLength = inst_length;
+   }
+
+   emit->inst_start_token = 0; /* reset to zero for error checking */
+   emit->discard_instruction = FALSE;
+}
+
+
+/**
+ * Return index for a free temporary register.
+ */
+static unsigned
+get_temp_index(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
+   return emit->num_shader_temps + emit->internal_temp_count++;
+}
+
+
+/**
+ * Release the temporaries which were generated by get_temp_index().
+ */
+static void
+free_temp_indexes(struct svga_shader_emitter_v10 *emit)
+{
+   emit->internal_temp_count = 0;
+}
+
+
+/**
+ * Create a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+make_src_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_src_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
+   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
+   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a temporary.
+ */
+static struct tgsi_full_src_register
+make_src_temp_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a constant.
+ */
+static struct tgsi_full_src_register
+make_src_const_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_CONSTANT, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for an immediate constant.
+ */
+static struct tgsi_full_src_register
+make_src_immediate_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_IMMEDIATE, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register.
+ */
+static struct tgsi_full_dst_register
+make_dst_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_dst_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for a temporary.
+ */
+static struct tgsi_full_dst_register
+make_dst_temp_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for an output.
+ */
+static struct tgsi_full_dst_register
+make_dst_output_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_OUTPUT, index);
+}
+
+
+/**
+ * Create negated tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+negate_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register neg = *reg;
+   neg.Register.Negate = !reg->Register.Negate;
+   return neg;
+}
+
+/**
+ * Create absolute value of a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+absolute_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register absolute = *reg;
+   absolute.Register.Absolute = 1;
+   return absolute;
+}
+
+
+/** Return the named swizzle term from the src register */
+static INLINE unsigned
+get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+{
+   switch (term) {
+   case TGSI_SWIZZLE_X:
+      return reg->Register.SwizzleX;
+   case TGSI_SWIZZLE_Y:
+      return reg->Register.SwizzleY;
+   case TGSI_SWIZZLE_Z:
+      return reg->Register.SwizzleZ;
+   case TGSI_SWIZZLE_W:
+      return reg->Register.SwizzleW;
+   default:
+      assert(!"Bad swizzle");
+      return TGSI_SWIZZLE_X;
+   }
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+swizzle_src(const struct tgsi_full_src_register *reg,
+            unsigned swizzleX, unsigned swizzleY,
+            unsigned swizzleZ, unsigned swizzleW)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
+   swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
+   swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
+   return swizzled;
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register where all the swizzle
+ * terms are the same.
+ */
+static struct tgsi_full_src_register
+scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX =
+   swizzled.Register.SwizzleY =
+   swizzled.Register.SwizzleZ =
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
+   return swizzled;
+}
+
+
+/**
+ * Create new tgsi_full_dst_register with writemask.
+ * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
+ */
+static struct tgsi_full_dst_register
+writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
+{
+   struct tgsi_full_dst_register masked = *reg;
+   masked.Register.WriteMask = mask;
+   return masked;
+}
+
+
+/**
+ * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
+ */
+static boolean
+same_swizzle_terms(const struct tgsi_full_src_register *reg)
+{
+   return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
+           reg->Register.SwizzleY == reg->Register.SwizzleZ &&
+           reg->Register.SwizzleZ == reg->Register.SwizzleW);
+}
+
+
+/**
+ * Search the vector for the value 'x' and return its position.
+ */
+static int
+find_imm_in_vec4(const union tgsi_immediate_data vec[4],
+                 union tgsi_immediate_data x)
+{
+   unsigned i;
+   for (i = 0; i < 4; i++) {
+      if (vec[i].Int == x.Int)
+         return i;
+   }
+   return -1;
+}
+
+
+/**
+ * Helper used by make_immediate_reg(), make_immediate_reg_4().
+ */
+static int
+find_immediate(struct svga_shader_emitter_v10 *emit,
+               union tgsi_immediate_data x, unsigned startIndex)
+{
+   const unsigned endIndex = emit->num_immediates;
+   unsigned i;
+
+   assert(emit->immediates_emitted);
+
+   /* Search immediates for x, y, z, w */
+   for (i = startIndex; i < endIndex; i++) {
+      if (x.Int == emit->immediates[i][0].Int ||
+          x.Int == emit->immediates[i][1].Int ||
+          x.Int == emit->immediates[i][2].Int ||
+          x.Int == emit->immediates[i][3].Int) {
+         return i;
+      }
+   }
+   /* Should never try to use an immediate value that wasn't pre-declared */
+   assert(!"find_immediate() failed!");
+   return -1;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data[4] value.
+ * Note: the values must have been previously declared/allocated in
+ * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
+ * vec4 immediate.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
+                     const union tgsi_immediate_data imm[4])
+{
+   struct tgsi_full_src_register reg;
+   unsigned i;
+
+   for (i = 0; i < emit->num_common_immediates; i++) {
+      /* search for first component value */
+      int immpos = find_immediate(emit, imm[0], i);
+      int x, y, z, w;
+
+      assert(immpos >= 0);
+
+      /* find remaining components within the immediate vector */
+      x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
+      y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
+      z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
+      w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
+
+      if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
+         /* found them all */
+         memset(&reg, 0, sizeof(reg));
+         reg.Register.File = TGSI_FILE_IMMEDIATE;
+         reg.Register.Index = immpos;
+         reg.Register.SwizzleX = x;
+         reg.Register.SwizzleY = y;
+         reg.Register.SwizzleZ = z;
+         reg.Register.SwizzleW = w;
+         return reg;
+      }
+      /* else, keep searching */
+   }
+
+   assert(!"Failed to find immediate register!");
+
+   /* Just return IMM[0].xxxx */
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data value of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg(struct svga_shader_emitter_v10 *emit,
+                   union tgsi_immediate_data value)
+{
+   struct tgsi_full_src_register reg;
+   int immpos = find_immediate(emit, value, 0);
+
+   assert(immpos >= 0);
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   reg.Register.Index = immpos;
+   reg.Register.SwizzleX =
+   reg.Register.SwizzleY =
+   reg.Register.SwizzleZ =
+   reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
+
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float[4] value.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
+                          float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
+{
+   union tgsi_immediate_data imm;
+   imm.Float = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
+                        int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
+{
+   union tgsi_immediate_data imm;
+   imm.Int = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Allocate space for a union tgsi_immediate_data[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
+                  const union tgsi_immediate_data imm[4])
+{
+   unsigned n = emit->num_immediates++;
+   assert(!emit->immediates_emitted);
+   assert(n < Elements(emit->immediates));
+   emit->immediates[n][0] = imm[0];
+   emit->immediates[n][1] = imm[1];
+   emit->immediates[n][2] = imm[2];
+   emit->immediates[n][3] = imm[3];
+   return n;
+}
+
+
+/**
+ * Allocate space for a float[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
+                       float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate space for a int[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
+                       int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate a shader input to store a system value.
+ */
+static unsigned
+alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
+{
+   const unsigned n = emit->info.num_inputs + index;
+   assert(index < Elements(emit->system_value_indexes));
+   emit->system_value_indexes[index] = n;
+   return n;
+}
+
+
+/**
+ * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
+ */
+static boolean
+emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_immediate *imm)
+{
+   /* We don't actually emit any code here.  We just save the
+    * immediate values and emit them later.
+    */
+   alloc_immediate_4(emit, imm->u);
+   return TRUE;
+}
+
+
+/**
+ * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
+ * containing all the immediate values previously allocated
+ * with alloc_immediate_4().
+ */
+static boolean
+emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 token;
+
+   assert(!emit->immediates_emitted);
+
+   token.value = 0;
+   token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
+   token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
+
+   /* Note: no begin/end_emit_instruction() calls */
+   emit_dword(emit, token.value);
+   emit_dword(emit, 2 + 4 * emit->num_immediates);
+   emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
+
+   emit->immediates_emitted = TRUE;
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
+ * interpolation mode.
+ * \return a VGPU10_INTERPOLATION_x value
+ */
+static unsigned
+translate_interpolation(const struct svga_shader_emitter_v10 *emit,
+                        unsigned interp, unsigned interpolate_loc)
+{
+   if (interp == TGSI_INTERPOLATE_COLOR) {
+      interp = emit->key.fs.flatshade ?
+         TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
+   }
+
+   switch (interp) {
+   case TGSI_INTERPOLATE_CONSTANT:
+      return VGPU10_INTERPOLATION_CONSTANT;
+   case TGSI_INTERPOLATE_LINEAR:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+   case TGSI_INTERPOLATE_PERSPECTIVE:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR;
+   default:
+      assert(!"Unexpected interpolation mode");
+      return VGPU10_INTERPOLATION_CONSTANT;
+   }
+}
+
+
+/**
+ * Translate a TGSI property to VGPU10.
+ * Don't emit any instructions yet, only need to gather the primitive property information.
+ * The output primitive topology might be changed later. The final property instructions
+ * will be emitted as part of the pre-helper code.
+ */
+static boolean
+emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_property *prop)
+{
+   static const VGPU10_PRIMITIVE primType[] = {
+      VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
+      VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const unsigned inputArraySize[] = {
+      0,       /* VGPU10_PRIMITIVE_UNDEFINED */
+      1,       /* VGPU10_PRIMITIVE_POINT */
+      2,       /* VGPU10_PRIMITIVE_LINE */
+      3,       /* VGPU10_PRIMITIVE_TRIANGLE */
+      0,
+      0,
+      4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
+      6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
+   };
+
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_INPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primType));
+      emit->gs.prim_type = primType[prop->u[0].Data];
+      assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
+      emit->gs.input_size = inputArraySize[emit->gs.prim_type];
+      break;
+
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primTopology));
+      emit->gs.prim_topology = primTopology[prop->u[0].Data];
+      assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
+      break;
+
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+      emit->gs.max_out_vertices = prop->u[0].Data;
+      break;
+
+   default:
+      break;
+   }
+
+   return TRUE;
+}
+
+
+static void
+emit_property_instruction(struct svga_shader_emitter_v10 *emit,
+                          VGPU10OpcodeToken0 opcode0, unsigned nData,
+                          unsigned data)
+{
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   if (nData)
+      emit_dword(emit, data);
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit property instructions
+ */
+static void
+emit_property_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* emit input primitive type declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
+   opcode0.primitive = emit->gs.prim_type;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit output primitive topology declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
+   opcode0.primitiveTopology = emit->gs.prim_topology;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit max output vertices */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
+   emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
+}
+
+
+/**
+ * Emit a vgpu10 declaration "instruction".
+ * \param index  the register index
+ * \param size   array size of the operand. In most cases, it is 1,
+ *               but for inputs to geometry shader, the array size varies
+ *               depending on the primitive type.
+ */
+static void
+emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
+                      VGPU10OpcodeToken0 opcode0,
+                      VGPU10OperandToken0 operand0,
+                      VGPU10NameToken name_token,
+                      unsigned index, unsigned size)
+{
+   assert(opcode0.opcodeType);
+   assert(operand0.mask);
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+
+   emit_dword(emit, operand0.value);
+
+   if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
+      /* Next token is the index of the register to declare */
+      emit_dword(emit, index);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
+      /* Next token is the size of the register */
+      emit_dword(emit, size);
+
+      /* Followed by the index of the register */
+      emit_dword(emit, index);
+   }
+
+   if (name_token.value) {
+      emit_dword(emit, name_token.value);
+   }
+
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit the declaration for a shader input.
+ * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
+ * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
+ * \param dim         index dimension
+ * \param index       the input register index
+ * \param size        array size of the operand. In most cases, it is 1,
+ *                    but for inputs to geometry shader, the array size varies
+ *                    depending on the primitive type.
+ * \param name        one of VGPU10_NAME_x
+ * \parma numComp     number of components
+ * \param selMode     component selection mode
+ * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ * \param interpMode  interpolation mode
+ */
+static void
+emit_input_declaration(struct svga_shader_emitter_v10 *emit,
+                       unsigned opcodeType, unsigned operandType,
+                       unsigned dim, unsigned index, unsigned size,
+                       unsigned name, unsigned numComp,
+                       unsigned selMode, unsigned usageMask,
+                       unsigned interpMode)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
+   assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
+          operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
+   assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
+   assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
+   assert(dim <= VGPU10_OPERAND_INDEX_3D);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_INSTANCE_ID ||
+          name == VGPU10_NAME_VERTEX_ID ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_IS_FRONT_FACE);
+   assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
+          interpMode == VGPU10_INTERPOLATION_CONSTANT ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+
+   check_register_index(emit, opcodeType, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = opcodeType;
+   opcode0.interpolationMode = interpMode;
+
+   operand0.operandType = operandType;
+   operand0.numComponents = numComp;
+   operand0.selectionMode = selMode;
+   operand0.mask = usageMask;
+   operand0.indexDimension = dim;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   if (dim == VGPU10_OPERAND_INDEX_2D)
+      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
+}
+
+
+/**
+ * Emit the declaration for a shader output.
+ * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
+ * \param index  the output register index
+ * \param name  one of VGPU10_NAME_x
+ * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ */
+static void
+emit_output_declaration(struct svga_shader_emitter_v10 *emit,
+                        unsigned type, unsigned index,
+                        unsigned name, unsigned usageMask)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+          name == VGPU10_NAME_CLIP_DISTANCE);
+
+   check_register_index(emit, type, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = type;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+   operand0.mask = usageMask;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment depth output.
+ */
+static void
+emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+   operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for a system value input/output.
+ */
+static void
+emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
+                              unsigned semantic_name, unsigned index)
+{
+   switch (semantic_name) {
+   case TGSI_SEMANTIC_INSTANCEID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_INSTANCE_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   case TGSI_SEMANTIC_VERTEXID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_VERTEX_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   default:
+      ; /* XXX */
+   }
+}
+
+/**
+ * Translate a TGSI declaration to VGPU10.
+ */
+static boolean
+emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
+                        const struct tgsi_full_declaration *decl)
+{
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      /* do nothing - see emit_input_declarations() */
+      return TRUE;
+
+   case TGSI_FILE_OUTPUT:
+      assert(decl->Range.First == decl->Range.Last);
+      emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
+      return TRUE;
+
+   case TGSI_FILE_TEMPORARY:
+      /* Don't declare the temps here.  Just keep track of how many
+       * and emit the declaration later.
+       */
+      if (decl->Declaration.Array) {
+         /* Indexed temporary array.  Save the start index of the array
+          * and the size of the array.
+          */
+         const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
+         unsigned i;
+
+         assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
+
+         /* Save this array so we can emit the declaration for it later */
+         emit->temp_arrays[arrayID].start = decl->Range.First;
+         emit->temp_arrays[arrayID].size =
+            decl->Range.Last - decl->Range.First + 1;
+
+         emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
+         assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
+         emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
+
+         /* Fill in the temp_map entries for this array */
+         for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+            emit->temp_map[i].arrayId = arrayID;
+            emit->temp_map[i].index = i - decl->Range.First;
+         }
+      }
+
+      /* for all temps, indexed or not, keep track of highest index */
+      emit->num_shader_temps = MAX2(emit->num_shader_temps,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_CONSTANT:
+      /* Don't declare constants here.  Just keep track and emit later. */
+      {
+         unsigned constbuf = 0, num_consts;
+         if (decl->Declaration.Dimension) {
+            constbuf = decl->Dim.Index2D;
+         }
+         /* We throw an assertion here when, in fact, the shader should never
+          * have linked due to constbuf index out of bounds, so we shouldn't
+          * have reached here.
+          */
+         assert(constbuf < Elements(emit->num_shader_consts));
+
+         num_consts = MAX2(emit->num_shader_consts[constbuf],
+                           decl->Range.Last + 1);
+
+         if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+            debug_printf("Warning: constant buffer is declared to size [%u]"
+                         " but [%u] is the limit.\n",
+                         num_consts,
+                         VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+         }
+         /* The linker doesn't enforce the max UBO size so we clamp here */
+         emit->num_shader_consts[constbuf] =
+            MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+      }
+      return TRUE;
+
+   case TGSI_FILE_IMMEDIATE:
+      assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
+      return FALSE;
+
+   case TGSI_FILE_SYSTEM_VALUE:
+      emit_system_value_declaration(emit, decl->Semantic.Name,
+                                    decl->Range.First);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER:
+      /* Don't declare samplers here.  Just keep track and emit later. */
+      emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_RESOURCE:
+      /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
+      /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
+      assert(!"TGSI_FILE_RESOURCE not handled yet");
+      return FALSE;
+
+   case TGSI_FILE_ADDRESS:
+      emit->num_address_regs = MAX2(emit->num_address_regs,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER_VIEW:
+      /* Not used at this time, but maybe in the future.
+       * See emit_resource_declarations().
+       */
+      return TRUE;
+
+   default:
+      assert(!"Unexpected type of declaration");
+      return FALSE;
+   }
+}
+
+
+
+/**
+ * Emit all input declarations.
+ */
+static boolean
+emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+
+      for (i = 0; i < emit->linkage.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned type, interpolationMode, name;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* fragment position input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_LINEAR;
+            name = VGPU10_NAME_POSITION;
+            if (usage_mask & TGSI_WRITEMASK_W) {
+               /* we need to replace use of 'w' with '1/w' */
+               emit->fs.fragcoord_input_index = i;
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_FACE) {
+            /* fragment front-facing input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_IS_FRONT_FACE;
+            emit->fs.face_input_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* primitive ID */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+         }
+         else {
+            /* general fragment input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS;
+            interpolationMode =
+               translate_interpolation(emit,
+                                       emit->info.input_interpolate[i],
+                                       emit->info.input_interpolate_loc[i]);
+
+            /* keeps track if flat interpolation mode is being used */
+            emit->uses_flat_interp = emit->uses_flat_interp ||
+               (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
+
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_input_declaration(emit, type,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                name,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                interpolationMode);
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+
+      for (i = 0; i < emit->info.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned opcodeType, operandType;
+         unsigned numComp, selMode;
+         unsigned name;
+         unsigned dim;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         opcodeType = VGPU10_OPCODE_DCL_INPUT;
+         operandType = VGPU10_OPERAND_TYPE_INPUT;
+         numComp = VGPU10_OPERAND_4_COMPONENT;
+         selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+         name = VGPU10_NAME_UNDEFINED;
+
+         /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+         dim = VGPU10_OPERAND_INDEX_2D;
+
+         if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* Primitive ID */
+            operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+            dim = VGPU10_OPERAND_INDEX_0D;
+            numComp = VGPU10_OPERAND_0_COMPONENT;
+            selMode = 0;
+
+            /* also save the register index so we can check for
+             * primitive id when emit src register. We need to modify the
+             * operand type, index dimension when emit primitive id src reg.
+             */
+            emit->gs.prim_id_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* vertex position input */
+            opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+         }
+
+         emit_input_declaration(emit, opcodeType, operandType,
+                                dim, index,
+                                emit->gs.input_size,
+                                name,
+                                numComp, selMode,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+   else {
+      assert(emit->unit == PIPE_SHADER_VERTEX);
+
+      for (i = 0; i < emit->info.num_inputs; i++) {
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = i;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                VGPU10_NAME_UNDEFINED,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit all output declarations.
+ */
+static boolean
+emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->info.num_outputs; i++) {
+      /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+      const unsigned semantic_name = emit->info.output_semantic_name[i];
+      const unsigned semantic_index = emit->info.output_semantic_index[i];
+      unsigned index = i;
+
+      if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (semantic_name == TGSI_SEMANTIC_COLOR) {
+            assert(semantic_index < Elements(emit->fs.color_out_index));
+
+            emit->fs.color_out_index[semantic_index] = index;
+
+            /* The semantic index is the shader's color output/buffer index */
+            emit_output_declaration(emit,
+                                    VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
+                                    VGPU10_NAME_UNDEFINED,
+                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+
+            if (semantic_index == 0) {
+               if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+                  /* Emit declarations for the additional color outputs
+                   * for broadcasting.
+                   */
+                  unsigned j;
+                  for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
+                     /* Allocate a new output index */
+                     unsigned idx = emit->info.num_outputs + j - 1;
+                     emit->fs.color_out_index[j] = idx;
+                     emit_output_declaration(emit,
+                                        VGPU10_OPCODE_DCL_OUTPUT, idx,
+                                        VGPU10_NAME_UNDEFINED,
+                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+                     emit->info.output_semantic_index[idx] = j;
+                  }
+               }
+            }
+            else {
+               assert(!emit->key.fs.write_color0_to_n_cbufs);
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output */
+            emit_fragdepth_output_declaration(emit);
+         }
+         else {
+            assert(!"Bad output semantic name");
+         }
+      }
+      else {
+         /* VS or GS */
+         unsigned name, type;
+         unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+         switch (semantic_name) {
+         case TGSI_SEMANTIC_POSITION:
+            assert(emit->unit != PIPE_SHADER_FRAGMENT);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+            /* Save the index of the vertex position output register */
+            emit->vposition.out_index = index;
+            break;
+         case TGSI_SEMANTIC_CLIPDIST:
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_CLIP_DISTANCE;
+            /* save the starting index of the clip distance output register */
+            if (semantic_index == 0)
+               emit->clip_dist_out_index = index;
+            writemask = emit->output_usage_mask[index];
+            writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
+            if (writemask == 0x0) {
+               continue; /* discard this do-nothing declaration */
+            }
+            break;
+         case TGSI_SEMANTIC_PRIMID:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+            break;
+         case TGSI_SEMANTIC_LAYER:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
+            break;
+         case TGSI_SEMANTIC_CLIPVERTEX:
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+            emit->clip_vertex_out_index = index;
+            break;
+         default:
+            /* generic output */
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_output_declaration(emit, type, index, name, writemask);
+      }
+   }
+
+   if (emit->vposition.so_index != INVALID_INDEX &&
+       emit->vposition.out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the non-adjusted vertex position
+       * for stream output purpose
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->vposition.so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   }
+
+   if (emit->clip_dist_so_index != INVALID_INDEX &&
+       emit->clip_dist_out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the clip distance shadow copy which
+       * will be used for stream output purpose and for clip distance
+       * varying variable
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->clip_dist_so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              emit->output_usage_mask[emit->clip_dist_out_index]);
+
+      if (emit->info.num_written_clipdistance > 4) {
+         /* for the second clip distance register, each handles 4 planes */
+         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                                 emit->clip_dist_so_index + 1,
+                                 VGPU10_NAME_UNDEFINED,
+                                 emit->output_usage_mask[emit->clip_dist_out_index+1]);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the declaration for the temporary registers.
+ */
+static boolean
+emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned total_temps, reg, i;
+
+   total_temps = emit->num_shader_temps;
+
+   /* Allocate extra temps for specially-implemented instructions,
+    * such as LIT.
+    */
+   total_temps += MAX_INTERNAL_TEMPS;
+
+   if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
+      if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
+          emit->key.clip_plane_enable ||
+          emit->vposition.so_index != INVALID_INDEX) {
+         emit->vposition.tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->unit == PIPE_SHADER_VERTEX) {
+         unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
+                                 emit->key.vs.adjust_attrib_itof |
+                                 emit->key.vs.adjust_attrib_utof |
+                                 emit->key.vs.attrib_is_bgra |
+                                 emit->key.vs.attrib_puint_to_snorm |
+                                 emit->key.vs.attrib_puint_to_uscaled |
+                                 emit->key.vs.attrib_puint_to_sscaled);
+         while (attrib_mask) {
+            unsigned index = u_bit_scan(&attrib_mask);
+            emit->vs.adjusted_input[index] = total_temps++;
+         }
+      }
+
+      if (emit->clip_mode == CLIP_DISTANCE) {
+         /* We need to write the clip distance to a temporary register
+          * first. Then it will be copied to the shadow copy for
+          * the clip distance varying variable and stream output purpose.
+          * It will also be copied to the actual CLIPDIST register
+          * according to the enabled clip planes
+          */
+         emit->clip_dist_tmp_index = total_temps++;
+         if (emit->info.num_written_clipdistance > 4)
+            total_temps++; /* second clip register */
+      }
+      else if (emit->clip_mode == CLIP_VERTEX) {
+         /* We need to convert the TGSI CLIPVERTEX output to one or more
+          * clip distances.  Allocate a temp reg for the clipvertex here.
+          */
+         assert(emit->info.writes_clipvertex > 0);
+         emit->clip_vertex_tmp_index = total_temps;
+         total_temps++;
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+          emit->key.fs.write_color0_to_n_cbufs > 1) {
+         /* Allocate a temp to hold the output color */
+         emit->fs.color_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.face_input_index != INVALID_INDEX) {
+         /* Allocate a temp for the +/-1 face register */
+         emit->fs.face_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+         /* Allocate a temp for modified fragment position register */
+         emit->fs.fragcoord_tmp_index = total_temps;
+         total_temps += 1;
+      }
+   }
+
+   for (i = 0; i < emit->num_address_regs; i++) {
+      emit->address_reg_index[i] = total_temps++;
+   }
+
+   /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
+    * temp indexes.  Basically, we compact all the non-array temp register
+    * indexes into a consecutive series.
+    *
+    * Before, we may have some TGSI declarations like:
+    *   DCL TEMP[0..1], LOCAL
+    *   DCL TEMP[2..4], ARRAY(1), LOCAL
+    *   DCL TEMP[5..7], ARRAY(2), LOCAL
+    *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
+    *
+    * After, we'll have a map like this:
+    *   temp_map[0] = { array 0, index 0 }
+    *   temp_map[1] = { array 0, index 1 }
+    *   temp_map[2] = { array 1, index 0 }
+    *   temp_map[3] = { array 1, index 1 }
+    *   temp_map[4] = { array 1, index 2 }
+    *   temp_map[5] = { array 2, index 0 }
+    *   temp_map[6] = { array 2, index 1 }
+    *   temp_map[7] = { array 2, index 2 }
+    *   temp_map[8] = { array 0, index 2 }
+    *   temp_map[9] = { array 0, index 3 }
+    *
+    * We'll declare two arrays of 3 elements, plus a set of four non-indexed
+    * temps numbered 0..3
+    *
+    * Any time we emit a temporary register index, we'll have to use the
+    * temp_map[] table to convert the TGSI index to the VGPU10 index.
+    *
+    * Finally, we recompute the total_temps value here.
+    */
+   reg = 0;
+   for (i = 0; i < total_temps; i++) {
+      if (emit->temp_map[i].arrayId == 0) {
+         emit->temp_map[i].index = reg++;
+      }
+   }
+   total_temps = reg;
+
+   if (0) {
+      debug_printf("total_temps %u\n", total_temps);
+      for (i = 0; i < 30; i++) {
+         debug_printf("temp %u ->  array %u  index %u\n",
+                      i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
+      }
+   }
+
+   /* Emit declaration of ordinary temp registers */
+   if (total_temps > 0) {
+      VGPU10OpcodeToken0 opcode0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, total_temps);
+      end_emit_instruction(emit);
+   }
+
+   /* Emit declarations for indexable temp arrays.  Skip 0th entry since
+    * it's unused.
+    */
+   for (i = 1; i < emit->num_temp_arrays; i++) {
+      unsigned num_temps = emit->temp_arrays[i].size;
+
+      if (num_temps > 0) {
+         VGPU10OpcodeToken0 opcode0;
+
+         opcode0.value = 0;
+         opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
+
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, i); /* which array */
+         emit_dword(emit, num_temps);
+         emit_dword(emit, 4); /* num components */
+         end_emit_instruction(emit);
+
+         total_temps += num_temps;
+      }
+   }
+
+   /* Check that the grand total of all regular and indexed temps is
+    * under the limit.
+    */
+   check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
+
+   return TRUE;
+}
+
+
+static boolean
+emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   unsigned total_consts, i;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
+   opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
+   /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
+
+   operand0.value = 0;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   /**
+    * Emit declaration for constant buffer [0].  We also allocate
+    * room for the extra constants here.
+    */
+   total_consts = emit->num_shader_consts[0];
+
+   /* Now, allocate constant slots for the "extra" constants */
+
+   /* Vertex position scale/translation */
+   if (emit->vposition.need_prescale) {
+      emit->vposition.prescale_scale_index = total_consts++;
+      emit->vposition.prescale_trans_index = total_consts++;
+   }
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (emit->key.vs.undo_viewport) {
+         emit->vs.viewport_index = total_consts++;
+      }
+   }
+
+   /* user-defined clip planes */
+   if (emit->key.clip_plane_enable) {
+      unsigned n = util_bitcount(emit->key.clip_plane_enable);
+      assert(emit->unit == PIPE_SHADER_VERTEX ||
+             emit->unit == PIPE_SHADER_GEOMETRY);
+      for (i = 0; i < n; i++) {
+         emit->clip_plane_const[i] = total_consts++;
+      }
+   }
+
+   /* Texcoord scale factors for RECT textures */
+   {
+      for (i = 0; i < emit->num_samplers; i++) {
+         if (emit->key.tex[i].unnormalized) {
+            emit->texcoord_scale_index[i] = total_consts++;
+         }
+      }
+   }
+
+   /* Texture buffer sizes */
+   for (i = 0; i < emit->num_samplers; i++) {
+      if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+         emit->texture_buffer_size_index[i] = total_consts++;
+      }
+   }
+
+   if (total_consts > 0) {
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, 0);  /* which const buffer slot */
+      emit_dword(emit, total_consts);
+      end_emit_instruction(emit);
+   }
+
+   /* Declare remaining constant buffers (UBOs) */
+   for (i = 1; i < Elements(emit->num_shader_consts); i++) {
+      if (emit->num_shader_consts[i] > 0) {
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, operand0.value);
+         emit_dword(emit, i);  /* which const buffer slot */
+         emit_dword(emit, emit->num_shader_consts[i]);
+         end_emit_instruction(emit);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit declarations for samplers.
+ */
+static boolean
+emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
+      opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
+
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ */
+static unsigned
+pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return VGPU10_RESOURCE_DIMENSION_BUFFER;
+   case PIPE_TEXTURE_1D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case PIPE_TEXTURE_3D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
+   case PIPE_TEXTURE_CUBE:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+   case PIPE_TEXTURE_1D_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
+   case PIPE_TEXTURE_2D_ARRAY:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+   default:
+      assert(!"Unexpected resource type");
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   }
+}
+
+
+/**
+ * Given a tgsi_return_type, return true iff it is an integer type.
+ */
+static boolean
+is_integer_type(enum tgsi_return_type type)
+{
+   switch (type) {
+      case TGSI_RETURN_TYPE_SINT:
+      case TGSI_RETURN_TYPE_UINT:
+         return TRUE;
+      case TGSI_RETURN_TYPE_FLOAT:
+      case TGSI_RETURN_TYPE_UNORM:
+      case TGSI_RETURN_TYPE_SNORM:
+         return FALSE;
+      case TGSI_RETURN_TYPE_COUNT:
+      default:
+         assert(!"is_integer_type: Unknown tgsi_return_type");
+         return FALSE;
+   }
+}
+
+
+/**
+ * Emit declarations for resources.
+ * XXX When we're sure that all TGSI shaders will be generated with
+ * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
+ * rework this code.
+ */
+static boolean
+emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   /* Emit resource decl for each sampler */
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+      VGPU10ResourceReturnTypeToken return_type;
+      VGPU10_RESOURCE_RETURN_TYPE rt;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
+      opcode0.resourceDimension =
+         pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
+                                            emit->key.tex[i].texture_msaa);
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+#if 1
+      /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
+      assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
+      rt = emit->key.tex[i].return_type + 1;
+#else
+      switch (emit->key.tex[i].return_type) {
+         case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
+         case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
+         case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
+         case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
+         case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
+         case TGSI_RETURN_TYPE_COUNT:
+         default:
+            rt = VGPU10_RETURN_TYPE_FLOAT;
+            assert(!"emit_resource_declarations: Unknown tgsi_return_type");
+      }
+#endif
+
+      return_type.value = 0;
+      return_type.component0 = rt;
+      return_type.component1 = rt;
+      return_type.component2 = rt;
+      return_type.component3 = rt;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      emit_dword(emit, return_type.value);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+static void
+emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     const struct tgsi_full_src_register *src3,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   emit_src_register(emit, src3);
+   end_emit_instruction(emit);
+}
+
+/**
+ * Emit the actual clip distance instructions to be used for clipping
+ * by copying the clip distance from the temporary registers to the
+ * CLIPDIST registers written with the enabled planes mask.
+ * Also copy the clip distance from the temporary to the clip distance
+ * shadow copy register which will be referenced by the input shader
+ */
+static void
+emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   struct tgsi_full_src_register tmp_clip_dist_src;
+   struct tgsi_full_dst_register clip_dist_dst;
+
+   unsigned i;
+   unsigned clip_plane_enable = emit->key.clip_plane_enable;
+   unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
+   unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+
+   assert(emit->clip_dist_out_index != INVALID_INDEX);
+   assert(emit->clip_dist_tmp_index != INVALID_INDEX);
+
+   /**
+    * Temporary reset the temporary clip dist register index so
+    * that the copy to the real clip dist register will not
+    * attempt to copy to the temporary register again
+    */
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+
+   for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+
+      tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
+
+      /**
+       * copy to the shadow copy for use by varying variable and
+       * stream output. All clip distances
+       * will be written regardless of the enabled clipping planes.
+       */
+      clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                   emit->clip_dist_so_index + i);
+
+      /* MOV clip_dist_so, tmp_clip_dist */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                           &tmp_clip_dist_src, FALSE);
+
+      /**
+       * copy those clip distances to enabled clipping planes
+       * to CLIPDIST registers for clipping
+       */
+      if (clip_plane_enable & 0xf) {
+         clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                      emit->clip_dist_out_index + i);
+         clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
+
+         /* MOV CLIPDIST, tmp_clip_dist */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                              &tmp_clip_dist_src, FALSE);
+      }
+      /* four clip planes per clip register */
+      clip_plane_enable >>= 4;
+   }
+   /**
+    * set the temporary clip dist register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_dist_tmp_index = clip_dist_tmp_index;
+}
+
+/* Declare clip distance output registers for user-defined clip planes
+ * or the TGSI_CLIPVERTEX output.
+ */
+static void
+emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+   unsigned index = emit->num_outputs;
+   unsigned plane_mask;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+   assert(num_clip_planes <= 8);
+
+   if (emit->clip_mode != CLIP_LEGACY &&
+       emit->clip_mode != CLIP_VERTEX) {
+      return;
+   }
+
+   if (num_clip_planes == 0)
+      return;
+
+   /* Declare one or two clip output registers.  The number of components
+    * in the mask reflects the number of clip planes.  For example, if 5
+    * clip planes are needed, we'll declare outputs similar to:
+    * dcl_output_siv o2.xyzw, clip_distance
+    * dcl_output_siv o3.x, clip_distance
+    */
+   emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
+
+   plane_mask = (1 << num_clip_planes) - 1;
+   if (plane_mask & 0xf) {
+      unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+   if (plane_mask & 0xf0) {
+      unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+}
+
+
+/**
+ * Emit the instructions for writing to the clip distance registers
+ * to handle legacy/automatic clip planes.
+ * For each clip plane, the distance is the dot product of the vertex
+ * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
+ * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
+ * output registers already declared.
+ */
+static void
+emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
+                             unsigned vpos_tmp_index)
+{
+   unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+
+   assert(emit->clip_mode == CLIP_LEGACY);
+   assert(num_clip_planes <= 8);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   for (i = 0; i < num_clip_planes; i++) {
+      struct tgsi_full_dst_register dst;
+      struct tgsi_full_src_register plane_src, vpos_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+      vpos_src = make_src_temp_reg(vpos_tmp_index);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &vpos_src, FALSE);
+   }
+}
+
+
+/**
+ * Emit the instructions for computing the clip distance results from
+ * the clip vertex temporary.
+ * For each clip plane, the distance is the dot product of the clip vertex
+ * position (found in a temp reg) and the clip plane coefficients.
+ */
+static void
+emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
+   unsigned i;
+   struct tgsi_full_dst_register dst;
+   struct tgsi_full_src_register clipvert_src;
+   const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   assert(emit->clip_mode == CLIP_VERTEX);
+
+   clipvert_src = make_src_temp_reg(clip_vertex_tmp);
+
+   for (i = 0; i < num_clip; i++) {
+      struct tgsi_full_src_register plane_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &clipvert_src, FALSE);
+   }
+
+   /* copy temporary clip vertex register to the clip vertex register */
+
+   assert(emit->clip_vertex_out_index != INVALID_INDEX);
+
+   /**
+    * temporary reset the temporary clip vertex register index so
+    * that copy to the clip vertex register will not attempt
+    * to copy to the temporary register again
+    */
+   emit->clip_vertex_tmp_index = INVALID_INDEX;
+
+   /* MOV clip_vertex, clip_vertex_tmp */
+   dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst, &clipvert_src, FALSE);
+
+   /**
+    * set the temporary clip vertex register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_vertex_tmp_index = clip_vertex_tmp;
+}
+
+/**
+ * Emit code to convert RGBA to BGRA
+ */
+static void
+emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register bgra_src =
+      swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, &bgra_src);
+   end_emit_instruction(emit);
+}
+
+
+/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
+static void
+emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
+                    const struct tgsi_full_dst_register *dst,
+                    const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
+   struct tgsi_full_src_register two =
+      make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
+   struct tgsi_full_src_register neg_two =
+      make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+
+   unsigned val_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
+   struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
+
+   unsigned bias_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
+   struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
+
+   /* val = src * 2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
+                        src, &two, FALSE);
+
+   /* bias = src > 0.5 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
+                        src, &half, FALSE);
+
+   /* bias = bias & -2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
+                        &bias_src, &neg_two, FALSE);
+
+   /* dst = val + bias */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
+                        &val_src, &bias_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
+static void
+emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register scale =
+      make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
+
+   /* dst = src * scale */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
+}
+
+
+/** Convert from R32_UINT to 10_10_10_2_sscaled */
+static void
+emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register lshift =
+      make_immediate_reg_int4(emit, 22, 12, 2, 0);
+   struct tgsi_full_src_register rshift =
+      make_immediate_reg_int4(emit, 22, 22, 22, 30);
+
+   struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+
+   /*
+    * r = (pixel << 22) >> 22;   # signed int in [511, -512]
+    * g = (pixel << 12) >> 22;   # signed int in [511, -512]
+    * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
+    * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
+    * dst = i_to_f(r,g,b,a);     # convert to float
+    */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
+                        &src_xxxx, &lshift, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
+                        &tmp_src, &rshift, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ABS instruction.
+ */
+static boolean
+emit_abs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = ABS(s0):
+    *   dst = abs(s0)
+    * Translates into:
+    *   MOV dst, abs(s0)
+    */
+   struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
+
+   /* MOV dst, abs(s0) */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &abs_src0, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
+ */
+static boolean
+emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
+              const struct tgsi_full_instruction *inst)
+{
+   unsigned index = inst->Dst[0].Register.Index;
+   struct tgsi_full_dst_register dst;
+   unsigned opcode;
+
+   assert(index < MAX_VGPU10_ADDR_REGS);
+   dst = make_dst_temp_reg(emit->address_reg_index[index]);
+
+   /* ARL dst, s0
+    * Translates into:
+    * FTOI address_tmp, s0
+    *
+    * UARL dst, s0
+    * Translates into:
+    * MOV address_tmp, s0
+    */
+   if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
+      opcode = VGPU10_OPCODE_FTOI;
+   else
+      opcode = VGPU10_OPCODE_MOV;
+
+   emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CAL instruction.
+ */
+static boolean
+emit_cal(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned label = inst->Label.Label;
+   VGPU10OperandToken0 operand;
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, operand.value);
+   emit_dword(emit, label);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IABS instruction.
+ */
+static boolean
+emit_iabs(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? -src0.x : src0.x
+    * dst.y = (src0.y < 0) ? -src0.y : src0.y
+    * dst.z = (src0.z < 0) ? -src0.z : src0.z
+    * dst.w = (src0.w < 0) ? -src0.w : src0.w
+    *
+    * Translates into
+    *   IMAX dst, src, neg(src)
+    */
+   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
+                        &inst->Src[0], &neg_src, FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? src1.x : src2.x
+    * dst.y = (src0.y < 0) ? src1.y : src2.y
+    * dst.z = (src0.z < 0) ? src1.z : src2.z
+    * dst.w = (src0.w < 0) ? src1.w : src2.w
+    *
+    * Translates into
+    *   LT tmp, src0, 0.0
+    *   MOVC dst, tmp, src1, src2
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
+                        &tmp_src, &inst->Src[1], &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DP2A instruction.
+ */
+static boolean
+emit_dp2a(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+    * Translate into
+    *   MAD tmp.x, s0.y, s1.y, s2.x
+    *   MAD tmp.x, s0.x, s1.x, tmp.x
+    *   MOV dst.xyzw, tmp.xxxx
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   struct tgsi_full_src_register src0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src2_xxxx =
+      scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
+
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
+                        &src1_yyyy, &src2_xxxx, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
+                        &src1_xxxx, &tmp_src_xxxx, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DPH instruction.
+ */
+static boolean
+emit_dph(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * DP3 tmp, s0, s1
+    * ADD dst, tmp, s1.wwww
+    */
+
+   struct tgsi_full_src_register s1_wwww =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
+                  TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /* DP3 tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* ADD dst, tmp, s1.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
+                        &s1_wwww, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DST instruction.
+ */
+static boolean
+emit_dst(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 1
+    * dst.y = src0.y * src1.y
+    * dst.z = src0.z
+    * dst.w = src1.w
+    */
+
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_wwww =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
+
+   /*
+    * If dst and either src0 and src1 are the same we need
+    * to create a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MUL dst.y, s0.y, s1.y */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
+                           &s1_yyyy, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.z, s0.z */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
+                           inst->Instruction.Saturate);
+  }
+
+   /* MOV dst.w, s1.w */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
+                           inst->Instruction.Saturate);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+
+/**
+ * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
+ */
+static boolean
+emit_endprim(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+   end_emit_instruction(emit);
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
+ */
+static boolean
+emit_ex2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = EX2(src):
+    *   dst.xyzw = 2.0 ^ src.x
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EXP instruction.
+ */
+static boolean
+emit_exp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 2 ^ floor(s0.x)
+    * dst.y = s0.x - floor(s0.x)
+    * dst.z = 2 ^ s0.x
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* ROUND_NI tmp.x, s0.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                        &src_xxxx, FALSE); /* round to -infinity */
+
+   /* EXP dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* ADD dst.y, s0.x, -tmp */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
+                           &neg_tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* EXP dst.z, s0.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
+                           inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
+                           FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IF instruction.
+ */
+static boolean
+emit_if(struct svga_shader_emitter_v10 *emit,
+        const struct tgsi_full_instruction *inst)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   /* The src register should be a scalar */
+   assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
+
+   /* The only special thing here is that we need to set the
+    * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
+    * src.x is non-zero.
+    */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_IF;
+   opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
+ * the register components are negative).
+ */
+static boolean
+emit_kill_if(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* tmp = src[0] < 0.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &zero, FALSE);
+
+   if (!same_swizzle_terms(&inst->Src[0])) {
+      /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
+       * logically OR the swizzle terms.  Most uses of KILL_IF only
+       * test one channel so it's good to avoid these extra steps.
+       */
+      struct tgsi_full_src_register tmp_src_yyyy =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register tmp_src_zzzz =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
+      struct tgsi_full_src_register tmp_src_wwww =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_W);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_wwww, FALSE);
+   }
+
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
+   emit_src_register(emit, &tmp_src_xxxx);
+   end_emit_instruction(emit);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
+ */
+static boolean
+emit_kill(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   /* DISCARD if 0.0 is zero */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);
+   emit_src_register(emit, &zero);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LG2 instruction.
+ */
+static boolean
+emit_lg2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = LG2(src):
+    *   dst.xyzw = log2(src.x)
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LIT instruction.
+ */
+static boolean
+emit_lit(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /*
+    * dst.x = 1
+    * dst.y = max(src.x, 0)
+    * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
+    * dst.w = 1
+    */
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   /* MAX dst.y, src.x, 0.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register src_xxxx =
+         swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
+                           &zero, inst->Instruction.Saturate);
+   }
+
+   /*
+    * tmp1 = clamp(src.w, -128, 128);
+    *   MAX tmp1, src.w, -128
+    *   MIN tmp1, tmp1, 128
+    *
+    * tmp2 = max(tmp2, 0);
+    *   MAX tmp2, src.y, 0
+    *
+    * tmp1 = pow(tmp2, tmp1);
+    *   LOG tmp2, tmp2
+    *   MUL tmp1, tmp2, tmp1
+    *   EXP tmp1, tmp1
+    *
+    * tmp1 = (src.w == 0) ? 1 : tmp1;
+    *   EQ tmp2, 0, src.w
+    *   MOVC tmp1, tmp2, 1.0, tmp1
+    *
+    * dst.z = (0 < src.x) ? tmp1 : 0;
+    *   LT tmp2, 0, src.x
+    *   MOVC dst.z, tmp2, tmp1, 0.0
+    */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      unsigned tmp1 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+      struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+      unsigned tmp2 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+      struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+      struct tgsi_full_src_register src_xxxx =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register src_yyyy =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register src_wwww =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register lowerbound =
+         make_immediate_reg_float(emit, -128.0f);
+      struct tgsi_full_src_register upperbound =
+         make_immediate_reg_float(emit, 128.0f);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
+                           &lowerbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
+                           &upperbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
+                           &zero, FALSE);
+
+      /* POW tmp1, tmp2, tmp1 */
+      /* LOG tmp2, tmp2 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
+                           FALSE);
+
+      /* MUL tmp1, tmp2, tmp1 */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
+                           &tmp1_src, FALSE);
+
+      /* EXP tmp1, tmp1 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
+                           FALSE);
+
+      /* EQ tmp2, 0, src.w */
+      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
+                           &src_wwww, FALSE);
+      /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
+                           &tmp2_src, &one, &tmp1_src, FALSE);
+
+      /* LT tmp2, 0, src.x */
+      emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
+                           &src_xxxx, FALSE);
+      /* MOVC dst.z, tmp2, tmp1, 0.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
+                           &tmp2_src, &tmp1_src, &zero, FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LOG instruction.
+ */
+static boolean
+emit_log(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = floor(lg2(abs(s0.x)))
+    * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
+    * dst.z = lg2(abs(s0.x))
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* LOG tmp.x, abs(s0.x) */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
+                          &abs_src_xxxx, FALSE);
+   }
+
+   /* MOV dst.z, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* FLR tmp.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                           &tmp_src, FALSE);
+   }
+
+   /* MOV dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* EXP tmp.x, tmp.x */
+   /* DIV dst.y, abs(s0.x), tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
+                           FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LRP instruction.
+ */
+static boolean
+emit_lrp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = LRP(s0, s1, s2):
+    *   dst = s0 * (s1 - s2) + s2
+    * Translates into:
+    *   SUB tmp, s1, s2;        tmp = s1 - s2
+    *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
+
+   /* ADD tmp, s1, -s2 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
+                        &inst->Src[1], &neg_src2, FALSE);
+
+   /* MAD dst, s1, tmp, s3 */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
+                        &inst->Src[0], &src_tmp, &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_POW instruction.
+ */
+static boolean
+emit_pow(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
+    * src1.x while VGPU10 computes four values.
+    *
+    * dst = POW(src0, src1):
+    *   dst.xyzw = src0.x ^ src1.x
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_xxxx =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
+                        FALSE);
+
+   /* MUL tmp, tmp, s1.xxxx */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
+                        &src1_xxxx, FALSE);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
+                        &tmp_src, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
+ */
+static boolean
+emit_rcp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* DIV tmp.x, 1.0, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RSQ instruction.
+ */
+static boolean
+emit_rsq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = RSQ(src):
+    *   dst.xyzw = 1 / sqrt(src.x)
+    * Translates into:
+    *   RSQ tmp, src.x
+    *   MOV dst, tmp.xxxx
+    */
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* RSQ tmp, src.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SCS instruction.
+ */
+static boolean
+emit_scs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = cos(src.x)
+    * dst.y = sin(src.x)
+    * dst.z = 0.0
+    * dst.w = 1.0
+    */
+   struct tgsi_full_dst_register dst_x =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register dst_y =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register dst_zw =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
+
+   struct tgsi_full_src_register zero_one =
+      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
+   emit_dst_register(emit, &dst_y);
+   emit_dst_register(emit, &dst_x);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst_zw, &zero_one, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
+ */
+static boolean
+emit_seq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SEQ(s0, s1):
+    *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* EQ tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
+ */
+static boolean
+emit_sge(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGE(s0, s1):
+    *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
+ */
+static boolean
+emit_sgt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGT(s0, s1):
+    *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
+ */
+static boolean
+emit_sincos(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
+
+   if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
+   {
+      emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
+      emit_null_dst_register(emit);  /* second destination register */
+   }
+   else {
+      emit_null_dst_register(emit);
+      emit_dst_register(emit, &tmp_dst_x);
+   }
+
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
+ */
+static boolean
+emit_sle(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLE(s0, s1):
+    *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
+ */
+static boolean
+emit_slt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLT(s0, s1):
+    *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
+ */
+static boolean
+emit_sne(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SNE(s0, s1):
+    *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* NE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
+ */
+static boolean
+emit_ssg(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+    * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+    * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+    * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+    * Translates into:
+    *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
+    *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
+    *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
+    */
+   struct tgsi_full_src_register zero =
+      make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one =
+      make_immediate_reg_float(emit, 1.0f);
+   struct tgsi_full_src_register neg_one =
+      make_immediate_reg_float(emit, -1.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
+                        &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
+                        &neg_one, &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
+                        &inst->Src[0], FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
+                        &one, &tmp2_src, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
+ */
+static boolean
+emit_issg(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
+    * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
+    * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
+    * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
+    * Translates into:
+    *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
+    *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
+    *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
+                        &zero, &inst->Src[0], FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
+                        &tmp1_src, &neg_tmp2, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SUB instruction.
+ */
+static boolean
+emit_sub(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SUB(s0, s1):
+    *   dst = s0 - s1
+    * Translates into:
+    *   ADD dst, s0, neg(s1)
+    */
+   struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
+
+   /* ADD dst, s0, neg(s1) */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
+                        &inst->Src[0], &neg_src1,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a comparison instruction.  The dest register will get
+ * 0 or ~0 values depending on the outcome of comparing src0 to src1.
+ */
+static void
+emit_comparison(struct svga_shader_emitter_v10 *emit,
+                SVGA3dCmpFunc func,
+                const struct tgsi_full_dst_register *dst,
+                const struct tgsi_full_src_register *src0,
+                const struct tgsi_full_src_register *src1)
+{
+   struct tgsi_full_src_register immediate;
+   VGPU10OpcodeToken0 opcode0;
+   boolean swapSrc = FALSE;
+
+   /* Sanity checks for svga vs. gallium enums */
+   STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
+   STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
+
+   opcode0.value = 0;
+
+   switch (func) {
+   case SVGA3D_CMP_NEVER:
+      immediate = make_immediate_reg_int(emit, 0);
+      /* MOV dst, {0} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_ALWAYS:
+      immediate = make_immediate_reg_int(emit, -1);
+      /* MOV dst, {-1} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_LESS:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      break;
+   case SVGA3D_CMP_EQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+      break;
+   case SVGA3D_CMP_LESSEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_GREATER:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_NOTEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_NE;
+      break;
+   case SVGA3D_CMP_GREATEREQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      break;
+   default:
+      assert(!"Unexpected comparison mode");
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+   }
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_dst_register(emit, dst);
+   if (swapSrc) {
+      emit_src_register(emit, src1);
+      emit_src_register(emit, src0);
+   }
+   else {
+      emit_src_register(emit, src0);
+      emit_src_register(emit, src1);
+   }
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Get texel/address offsets for a texture instruction.
+ */
+static void
+get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_instruction *inst, int offsets[3])
+{
+   if (inst->Texture.NumOffsets == 1) {
+      /* According to OpenGL Shader Language spec the offsets are only
+       * fetched from a previously-declared immediate/literal.
+       */
+      const struct tgsi_texture_offset *off = inst->TexOffsets;
+      const unsigned index = off[0].Index;
+      const unsigned swizzleX = off[0].SwizzleX;
+      const unsigned swizzleY = off[0].SwizzleY;
+      const unsigned swizzleZ = off[0].SwizzleZ;
+      const union tgsi_immediate_data *imm = emit->immediates[index];
+
+      assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
+
+      offsets[0] = imm[swizzleX].Int;
+      offsets[1] = imm[swizzleY].Int;
+      offsets[2] = imm[swizzleZ].Int;
+   }
+   else {
+      offsets[0] = offsets[1] = offsets[2] = 0;
+   }
+}
+
+
+/**
+ * Set up the coordinate register for texture sampling.
+ * When we're sampling from a RECT texture we have to scale the
+ * unnormalized coordinate to a normalized coordinate.
+ * We do that by multiplying the coordinate by an "extra" constant.
+ * An alternative would be to use the RESINFO instruction to query the
+ * texture's size.
+ */
+static struct tgsi_full_src_register
+setup_texcoord(struct svga_shader_emitter_v10 *emit,
+               unsigned unit,
+               const struct tgsi_full_src_register *coord)
+{
+   if (emit->key.tex[unit].unnormalized) {
+      unsigned scale_index = emit->texcoord_scale_index[unit];
+      unsigned tmp = get_temp_index(emit);
+      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+      struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
+
+      /* MUL tmp, coord, const[] */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+                           coord, &scale_src, FALSE);
+      return tmp_src;
+   }
+   else {
+      /* use texcoord as-is */
+      return *coord;
+   }
+}
+
+
+/**
+ * For SAMPLE_C instructions, emit the extra src register which indicates
+ * the reference/comparision value.
+ */
+static void
+emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
+                          unsigned target,
+                          const struct tgsi_full_src_register *coord)
+{
+   struct tgsi_full_src_register coord_src_ref;
+   unsigned component;
+
+   assert(tgsi_is_shadow_target(target));
+
+   assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
+   if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+       target == TGSI_TEXTURE_SHADOWCUBE)
+      component = TGSI_SWIZZLE_W;
+   else
+      component = TGSI_SWIZZLE_Z;
+
+   coord_src_ref = scalar_src(coord, component);
+
+   emit_src_register(emit, &coord_src_ref);
+}
+
+
+/**
+ * Info for implementing texture swizzles.
+ * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
+ * functions use this to encapsulate the extra steps needed to perform
+ * a texture swizzle, or shadow/depth comparisons.
+ * The shadow/depth comparison is only done here if for the cases where
+ * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
+ */
+struct tex_swizzle_info
+{
+   boolean swizzled;
+   boolean shadow_compare;
+   unsigned unit;
+   unsigned texture_target;  /**< TGSI_TEXTURE_x */
+   struct tgsi_full_src_register tmp_src;
+   struct tgsi_full_dst_register tmp_dst;
+   const struct tgsi_full_dst_register *inst_dst;
+   const struct tgsi_full_src_register *coord_src;
+};
+
+
+/**
+ * Do setup for handling texture swizzles or shadow compares.
+ * \param unit  the texture unit
+ * \param inst  the TGSI texture instruction
+ * \param shadow_compare  do shadow/depth comparison?
+ * \param swz  returns the swizzle info
+ */
+static void
+begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                  unsigned unit,
+                  const struct tgsi_full_instruction *inst,
+                  boolean shadow_compare,
+                  struct tex_swizzle_info *swz)
+{
+   swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
+                    emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
+                    emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
+                    emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
+
+   swz->shadow_compare = shadow_compare;
+   swz->texture_target = inst->Texture.Texture;
+
+   if (swz->swizzled || shadow_compare) {
+      /* Allocate temp register for the result of the SAMPLE instruction
+       * and the source of the MOV/compare/swizzle instructions.
+       */
+      unsigned tmp = get_temp_index(emit);
+      swz->tmp_src = make_src_temp_reg(tmp);
+      swz->tmp_dst = make_dst_temp_reg(tmp);
+
+      swz->unit = unit;
+   }
+   swz->inst_dst = &inst->Dst[0];
+   swz->coord_src = &inst->Src[0];
+}
+
+
+/**
+ * Returns the register to put the SAMPLE instruction results into.
+ * This will either be the original instruction dst reg (if no swizzle
+ * and no shadow comparison) or a temporary reg if there is a swizzle.
+ */
+static const struct tgsi_full_dst_register *
+get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
+{
+   return (swz->swizzled || swz->shadow_compare)
+      ? &swz->tmp_dst : swz->inst_dst;
+}
+
+
+/**
+ * This emits the MOV instruction that actually implements a texture swizzle
+ * and/or shadow comparison.
+ */
+static void
+end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                const struct tex_swizzle_info *swz)
+{
+   if (swz->shadow_compare) {
+      /* Emit extra instructions to compare the fetched texel value against
+       * a texture coordinate component.  The result of the comparison
+       * is 0.0 or 1.0.
+       */
+      struct tgsi_full_src_register coord_src;
+      struct tgsi_full_src_register texel_src =
+         scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      /* convert gallium comparison func to SVGA comparison func */
+      SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
+
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+      switch (swz->texture_target) {
+      case TGSI_TEXTURE_SHADOW2D:
+      case TGSI_TEXTURE_SHADOWRECT:
+      case TGSI_TEXTURE_SHADOW1D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+         break;
+      case TGSI_TEXTURE_SHADOW1D:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
+         break;
+      case TGSI_TEXTURE_SHADOWCUBE:
+      case TGSI_TEXTURE_SHADOW2D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
+         break;
+      default:
+         assert(!"Unexpected texture target in end_tex_swizzle()");
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+      }
+
+      /* COMPARE tmp, coord, texel */
+      /* XXX it would seem that the texel and coord arguments should
+       * be transposed here, but piglit tests indicate otherwise.
+       */
+      emit_comparison(emit, compare_func,
+                      &swz->tmp_dst, &texel_src, &coord_src);
+
+      /* AND dest, tmp, {1.0} */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
+      if (swz->swizzled) {
+         emit_dst_register(emit, &swz->tmp_dst);
+      }
+      else {
+         emit_dst_register(emit, swz->inst_dst);
+      }
+      emit_src_register(emit, &swz->tmp_src);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   if (swz->swizzled) {
+      unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
+      unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
+      unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
+      unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
+      unsigned writemask_0 = 0, writemask_1 = 0;
+      boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+
+      /* Swizzle w/out zero/one terms */
+      struct tgsi_full_src_register src_swizzled =
+         swizzle_src(&swz->tmp_src,
+                     swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
+                     swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
+                     swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
+                     swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
+
+      /* MOV dst, color(tmp).<swizzle> */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           swz->inst_dst, &src_swizzled, FALSE);
+
+      /* handle swizzle zero terms */
+      writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
+
+      if (writemask_0) {
+         struct tgsi_full_src_register zero = int_tex ?
+            make_immediate_reg_int(emit, 0) :
+            make_immediate_reg_float(emit, 0.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_0);
+
+         /* MOV dst.writemask_0, {0,0,0,0} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                              &dst, &zero, FALSE);
+      }
+
+      /* handle swizzle one terms */
+      writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ONE) << 3));
+
+      if (writemask_1) {
+         struct tgsi_full_src_register one = int_tex ?
+            make_immediate_reg_int(emit, 1) :
+            make_immediate_reg_float(emit, 1.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_1);
+
+         /* MOV dst.writemask_1, {1,1,1,1} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
+      }
+   }
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SAMPLE instruction.
+ */
+static boolean
+emit_sample(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned resource_unit = inst->Src[1].Register.Index;
+   const unsigned sampler_unit = inst->Src[2].Register.Index;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, resource_unit);
+   emit_sampler_register(emit, sampler_unit);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Check if a texture instruction is valid.
+ * An example of an invalid texture instruction is doing shadow comparison
+ * with an integer-valued texture.
+ * If we detect an invalid texture instruction, we replace it with:
+ *   MOV dst, {1,1,1,1};
+ * \return TRUE if valid, FALSE if invalid.
+ */
+static boolean
+is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_full_instruction *inst)
+{
+   const unsigned unit = inst->Src[1].Register.Index;
+   const unsigned target = inst->Texture.Texture;
+   boolean valid = TRUE;
+
+   if (tgsi_is_shadow_target(target) &&
+       is_integer_type(emit->key.tex[unit].return_type)) {
+      debug_printf("Invalid SAMPLE_C with an integer texture!\n");
+      valid = FALSE;
+   }
+   /* XXX might check for other conditions in the future here */
+
+   if (!valid) {
+      /* emit a MOV dst, {1,1,1,1} instruction. */
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   return valid;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
+ */
+static boolean
+emit_tex(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &coord);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXP (projective texture)
+ */
+static boolean
+emit_txp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   int offsets[3];
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_wwww =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* DIV tmp, coord, coord.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
+                        &coord, &src0_wwww, FALSE);
+
+   /* SAMPLE dst, coord(tmp), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &tmp_src);  /* projected coord */
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &tmp_src);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/*
+ * Emit code for TGSI_OPCODE_XPD instruction.
+ */
+static boolean
+emit_xpd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.y * src1.z - src1.y * src0.z
+    * dst.y = src0.z * src1.x - src1.z * src0.x
+    * dst.z = src0.x * src1.y - src1.x * src0.y
+    * dst.w = 1
+    */
+   struct tgsi_full_src_register s0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+
+   struct tgsi_full_src_register s1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_zzzz =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+   struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
+
+   unsigned tmp3 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst_x =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register tmp3_dst_y =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register tmp3_dst_z =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
+   struct tgsi_full_dst_register tmp3_dst_w =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
+
+   /* Note: we put all the intermediate computations into tmp3 in case
+    * the XPD dest register is that same as one of the src regs (in which
+    * case we could clobber a src reg before we're done with it) .
+    *
+    * Note: we could get by with just one temp register instead of three
+    * since we're doing scalar operations and there's enough room in one
+    * temp for everything.
+    */
+
+   /* MUL tmp1, src0.y, src1.z */
+   /* MUL tmp2, src1.y, src0.z */
+   /* ADD tmp3.x, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
+                           &s0_yyyy, &s1_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
+                           &s1_yyyy, &s0_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.z, src1.x */
+   /* MUL tmp2, src1.z, src0.x */
+   /* ADD tmp3.y, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
+                           &s1_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
+                           &s0_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.x, src1.y */
+   /* MUL tmp2, src1.x, src0.y */
+   /* ADD tmp3.z, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
+                           &s1_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
+                           &s0_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MOV tmp3.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
+   }
+
+   /* MOV dst, tmp3 */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
+                        inst->Instruction.Saturate);
+
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
+ */
+static boolean
+emit_txd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[3].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   int offsets[3];
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
+   begin_emit_instruction(emit);
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
+   emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXF (texel fetch)
+ */
+static boolean
+emit_txf(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   const unsigned msaa = emit->key.tex[unit].texture_msaa;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   if (msaa) {
+      /* Fetch one sample from an MSAA texture */
+      struct tgsi_full_src_register sampleIndex =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      /* LD_MS dst, coord(s0), resource, sampleIndex */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      emit_src_register(emit, &sampleIndex);
+      end_emit_instruction(emit);
+   }
+   else {
+      /* Fetch one texel specified by integer coordinate */
+      /* LD dst, coord(s0), resource */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
+ * or TGSI_OPCODE_TXB2 (for cube shadow maps).
+ */
+static boolean
+emit_txl_txb(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode, unit;
+   int offsets[3];
+   struct tgsi_full_src_register coord, lod_bias;
+   struct tex_swizzle_info swz_info;
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+      lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+      unit = inst->Src[2].Register.Index;
+   }
+   else {
+      lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      unit = inst->Src[1].Register.Index;
+   }
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
+   begin_emit_instruction(emit);
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
+      opcode = VGPU10_OPCODE_SAMPLE_L;
+   }
+   else {
+      opcode = VGPU10_OPCODE_SAMPLE_B;
+   }
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &lod_bias);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
+ */
+static boolean
+emit_txq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+
+   if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+      /* RESINFO does not support querying texture buffers, so we instead
+       * store texture buffer sizes in shader constants, then copy them to
+       * implement TXQ instead of emitting RESINFO.
+       * MOV dst, const[texture_buffer_size_index[unit]]
+       */
+      struct tgsi_full_src_register size_src =
+         make_src_const_reg(emit->texture_buffer_size_index[unit]);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
+                           FALSE);
+   } else {
+      /* RESINFO dst, srcMipLevel, resource */
+      begin_emit_instruction(emit);
+      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a simple instruction (like ADD, MUL, MIN, etc).
+ */
+static boolean
+emit_simple(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+   for (i = 0; i < op->num_dst; i++) {
+      emit_dst_register(emit, &inst->Dst[i]);
+   }
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a simple VGPU10 instruction which writes to multiple dest registers,
+ * where TGSI only uses one dest register.
+ */
+static boolean
+emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
+                 const struct tgsi_full_instruction *inst,
+                 unsigned dst_count,
+                 unsigned dst_index)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+
+   for (i = 0; i < dst_count; i++) {
+      if (i == dst_index) {
+         emit_dst_register(emit, &inst->Dst[0]);
+      } else {
+         emit_null_dst_register(emit);
+      }
+   }
+
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a single TGSI instruction to VGPU10.
+ */
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+                        unsigned inst_number,
+                        const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+
+   switch (opcode) {
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_BRK:
+   case TGSI_OPCODE_CEIL:
+   case TGSI_OPCODE_CONT:
+   case TGSI_OPCODE_DDX:
+   case TGSI_OPCODE_DDY:
+   case TGSI_OPCODE_DIV:
+   case TGSI_OPCODE_DP2:
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_ENDSUB:
+   case TGSI_OPCODE_F2I:
+   case TGSI_OPCODE_F2U:
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_FRC:
+   case TGSI_OPCODE_FSEQ:
+   case TGSI_OPCODE_FSGE:
+   case TGSI_OPCODE_FSLT:
+   case TGSI_OPCODE_FSNE:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_MAD:
+   case TGSI_OPCODE_MAX:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_MUL:
+   case TGSI_OPCODE_NOP:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_RET:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_SQRT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_TRUNC:
+   case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_UCMP:
+   case TGSI_OPCODE_USHR:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_XOR:
+      /* simple instructions */
+      return emit_simple(emit, inst);
+
+
+   case TGSI_OPCODE_EMIT:
+      return emit_vertex(emit, inst);
+   case TGSI_OPCODE_ENDPRIM:
+      return emit_endprim(emit, inst);
+   case TGSI_OPCODE_ABS:
+      return emit_abs(emit, inst);
+   case TGSI_OPCODE_IABS:
+      return emit_iabs(emit, inst);
+   case TGSI_OPCODE_ARL:
+      /* fall-through */
+   case TGSI_OPCODE_UARL:
+      return emit_arl_uarl(emit, inst);
+   case TGSI_OPCODE_BGNSUB:
+      /* no-op */
+      return TRUE;
+   case TGSI_OPCODE_CAL:
+      return emit_cal(emit, inst);
+   case TGSI_OPCODE_CMP:
+      return emit_cmp(emit, inst);
+   case TGSI_OPCODE_COS:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_DP2A:
+      return emit_dp2a(emit, inst);
+   case TGSI_OPCODE_DPH:
+      return emit_dph(emit, inst);
+   case TGSI_OPCODE_DST:
+      return emit_dst(emit, inst);
+   case TGSI_OPCODE_EX2:
+      return emit_ex2(emit, inst);
+   case TGSI_OPCODE_EXP:
+      return emit_exp(emit, inst);
+   case TGSI_OPCODE_IF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_KILL:
+      return emit_kill(emit, inst);
+   case TGSI_OPCODE_KILL_IF:
+      return emit_kill_if(emit, inst);
+   case TGSI_OPCODE_LG2:
+      return emit_lg2(emit, inst);
+   case TGSI_OPCODE_LIT:
+      return emit_lit(emit, inst);
+   case TGSI_OPCODE_LOG:
+      return emit_log(emit, inst);
+   case TGSI_OPCODE_LRP:
+      return emit_lrp(emit, inst);
+   case TGSI_OPCODE_POW:
+      return emit_pow(emit, inst);
+   case TGSI_OPCODE_RCP:
+      return emit_rcp(emit, inst);
+   case TGSI_OPCODE_RSQ:
+      return emit_rsq(emit, inst);
+   case TGSI_OPCODE_SAMPLE:
+      return emit_sample(emit, inst);
+   case TGSI_OPCODE_SCS:
+      return emit_scs(emit, inst);
+   case TGSI_OPCODE_SEQ:
+      return emit_seq(emit, inst);
+   case TGSI_OPCODE_SGE:
+      return emit_sge(emit, inst);
+   case TGSI_OPCODE_SGT:
+      return emit_sgt(emit, inst);
+   case TGSI_OPCODE_SIN:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_SLE:
+      return emit_sle(emit, inst);
+   case TGSI_OPCODE_SLT:
+      return emit_slt(emit, inst);
+   case TGSI_OPCODE_SNE:
+      return emit_sne(emit, inst);
+   case TGSI_OPCODE_SSG:
+      return emit_ssg(emit, inst);
+   case TGSI_OPCODE_ISSG:
+      return emit_issg(emit, inst);
+   case TGSI_OPCODE_SUB:
+      return emit_sub(emit, inst);
+   case TGSI_OPCODE_TEX:
+      return emit_tex(emit, inst);
+   case TGSI_OPCODE_TXP:
+      return emit_txp(emit, inst);
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXB2:
+   case TGSI_OPCODE_TXL:
+      return emit_txl_txb(emit, inst);
+   case TGSI_OPCODE_TXD:
+      return emit_txd(emit, inst);
+   case TGSI_OPCODE_TXF:
+      return emit_txf(emit, inst);
+   case TGSI_OPCODE_TXQ:
+      return emit_txq(emit, inst);
+   case TGSI_OPCODE_UIF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_XPD:
+      return emit_xpd(emit, inst);
+   case TGSI_OPCODE_UMUL_HI:
+   case TGSI_OPCODE_IMUL_HI:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_IDIV:
+      /* These cases use only the FIRST of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 0);
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      /* These cases use only the SECOND of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 1);
+   case TGSI_OPCODE_END:
+      if (!emit_post_helpers(emit))
+         return FALSE;
+      return emit_simple(emit, inst);
+
+   default:
+      debug_printf("Unimplemented tgsi instruction %s\n",
+                   tgsi_get_opcode_name(opcode));
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the extra instructions to adjust the vertex position.
+ * There are two possible adjustments:
+ * 1. Converting from Gallium to VGPU10 coordinate space by applying the
+ *    "prescale" and "pretranslate" values.
+ * 2. Undoing the viewport transformation when we use the swtnl/draw path.
+ * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
+ */
+static void
+emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
+                       unsigned vs_pos_tmp_index)
+{
+   struct tgsi_full_src_register tmp_pos_src;
+   struct tgsi_full_dst_register pos_dst;
+
+   /* Don't bother to emit any extra vertex instructions if vertex position is
+    * not written out
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
+   pos_dst = make_dst_output_reg(emit->vposition.out_index);
+
+   /* If non-adjusted vertex position register index
+    * is valid, copy the vertex position from the temporary
+    * vertex position register before it is modified by the
+    * prescale computation.
+    */
+   if (emit->vposition.so_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register pos_so_dst =
+         make_dst_output_reg(emit->vposition.so_index);
+
+      /* MOV pos_so, tmp_pos */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
+                           &tmp_pos_src, FALSE);
+   }
+
+   if (emit->vposition.need_prescale) {
+      /* This code adjusts the vertex position to match the VGPU10 convention.
+       * If p is the position computed by the shader (usually by applying the
+       * modelview and projection matrices), the new position q is computed by:
+       *
+       * q.x = p.w * trans.x + p.x * scale.x
+       * q.y = p.w * trans.y + p.y * scale.y
+       * q.z = p.w * trans.z + p.z * scale.z;
+       * q.w = p.w * trans.w + p.w;
+       */
+      struct tgsi_full_src_register tmp_pos_src_w =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xyz =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
+
+      struct tgsi_full_src_register prescale_scale =
+         make_src_const_reg(emit->vposition.prescale_scale_index);
+      struct tgsi_full_src_register prescale_trans =
+         make_src_const_reg(emit->vposition.prescale_trans_index);
+
+      /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
+                           &tmp_pos_src, &prescale_scale, FALSE);
+
+      /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
+                           &prescale_trans, &tmp_pos_src, FALSE);
+   }
+   else if (emit->key.vs.undo_viewport) {
+      /* This code computes the final vertex position from the temporary
+       * vertex position by undoing the viewport transformation and the
+       * divide-by-W operation (we convert window coords back to clip coords).
+       * This is needed when we use the 'draw' module for fallbacks.
+       * If p is the temp pos in window coords, then the NDC coord q is:
+       *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
+       *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
+       *   q.z = p.z * p.w
+       *   q.w = p.w
+       * CONST[vs_viewport_index] contains:
+       *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
+       */
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xy =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
+      struct tgsi_full_src_register tmp_pos_src_wwww =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+
+      struct tgsi_full_dst_register pos_dst_xyz =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register pos_dst_w =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
+
+      struct tgsi_full_src_register vp_xyzw =
+         make_src_const_reg(emit->vs.viewport_index);
+      struct tgsi_full_src_register vp_zwww =
+         swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+                     TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+      /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_zwww, FALSE);
+
+      /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_xyzw, FALSE);
+
+      /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
+                           &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
+
+      /* MOV pos.w, tmp_pos.w */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
+                           &tmp_pos_src, FALSE);
+   }
+   else if (vs_pos_tmp_index != INVALID_INDEX) {
+      /* This code is to handle the case where the temporary vertex
+       * position register is created when the vertex shader has stream
+       * output and prescale is disabled because rasterization is to be
+       * discarded.
+       */
+      struct tgsi_full_dst_register pos_dst =
+         make_dst_output_reg(emit->vposition.out_index);
+
+      /* MOV pos, tmp_pos */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &pos_dst);
+      emit_src_register(emit, &tmp_pos_src);
+      end_emit_instruction(emit);
+   }
+}
+
+static void
+emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->clip_mode == CLIP_DISTANCE) {
+      /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
+      emit_clip_distance_instructions(emit);
+
+   } else if (emit->clip_mode == CLIP_VERTEX) {
+      /* Convert TGSI CLIPVERTEX to CLIPDIST */
+      emit_clip_vertex_instructions(emit);
+   }
+
+   /**
+    * Emit vertex position and take care of legacy user planes only if
+    * there is a valid vertex position register index.
+    * This is to take care of the case
+    * where the shader doesn't output vertex position. Then in
+    * this case, don't bother to emit more vertex instructions.
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   /**
+    * Emit per-vertex clipping instructions for legacy user defined clip planes.
+    * NOTE: we must emit the clip distance instructions before the
+    * emit_vpos_instructions() call since the later function will change
+    * the TEMP[vs_pos_tmp_index] value.
+    */
+   if (emit->clip_mode == CLIP_LEGACY) {
+      /* Emit CLIPDIST for legacy user defined clip planes */
+      emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
+   }
+}
+
+
+/**
+ * Emit extra per-vertex instructions.  This includes clip-coordinate
+ * space conversion and computing clip distances.  This is called for
+ * each GS emit-vertex instruction and at the end of VS translation.
+ */
+static void
+emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
+
+   /* Emit clipping instructions based on clipping mode */
+   emit_clipping_instructions(emit);
+
+   /**
+    * Reset the temporary vertex position register index
+    * so that emit_dst_register() will use the real vertex position output
+    */
+   emit->vposition.tmp_index = INVALID_INDEX;
+
+   /* Emit vertex position instructions */
+   emit_vpos_instructions(emit, vs_pos_tmp_index);
+
+   /* Restore original vposition.tmp_index value for the next GS vertex.
+    * It doesn't matter for VS.
+    */
+   emit->vposition.tmp_index = vs_pos_tmp_index;
+}
+
+/**
+ * Translate the TGSI_OPCODE_EMIT GS instruction.
+ */
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   unsigned ret = TRUE;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   emit_vertex_instructions(emit);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+   end_emit_instruction(emit);
+
+   return ret;
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's boolean front-face
+ * register to TGSI's signed front-face register.
+ *
+ * TODO: Make temporary front-face register a scalar.
+ */
+static void
+emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.face_input_index != INVALID_INDEX) {
+      /* convert vgpu10 boolean face register to gallium +/-1 value */
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.face_tmp_index);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register neg_one =
+         make_immediate_reg_float(emit, -1.0f);
+
+      /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_face_register(emit);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &neg_one);
+      end_emit_instruction(emit);
+   }
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
+ */
+static void
+emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
+      struct tgsi_full_dst_register tmp_dst_xyz =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register tmp_dst_w =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register fragcoord =
+         make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
+
+      /* save the input index */
+      unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
+      /* set to invalid to prevent substitution in emit_src_register() */
+      emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+      /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &tmp_dst_xyz);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
+      emit_dst_register(emit, &tmp_dst_w);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* restore saved value */
+      emit->fs.fragcoord_input_index = fragcoord_input_index;
+   }
+}
+
+
+/**
+ * Emit extra instructions to adjust VS inputs/attributes.  This can
+ * mean casting a vertex attribute from int to float or setting the
+ * W component to 1, or both.
+ */
+static void
+emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
+   const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
+   const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
+   const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
+   const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
+   const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
+   const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
+
+   unsigned adjust_mask = (save_w_1_mask |
+                           save_itof_mask |
+                           save_utof_mask |
+                           save_is_bgra_mask |
+                           save_puint_to_snorm_mask |
+                           save_puint_to_uscaled_mask |
+                           save_puint_to_sscaled_mask);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX);
+
+   if (adjust_mask) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      struct tgsi_full_src_register one_int =
+         make_immediate_reg_int(emit, 1);
+
+      /* We need to turn off these bitmasks while emitting the
+       * instructions below, then restore them afterward.
+       */
+      emit->key.vs.adjust_attrib_w_1 = 0;
+      emit->key.vs.adjust_attrib_itof = 0;
+      emit->key.vs.adjust_attrib_utof = 0;
+      emit->key.vs.attrib_is_bgra = 0;
+      emit->key.vs.attrib_puint_to_snorm = 0;
+      emit->key.vs.attrib_puint_to_uscaled = 0;
+      emit->key.vs.attrib_puint_to_sscaled = 0;
+
+      while (adjust_mask) {
+         unsigned index = u_bit_scan(&adjust_mask);
+         unsigned tmp = emit->vs.adjusted_input[index];
+         struct tgsi_full_src_register input_src =
+            make_src_reg(TGSI_FILE_INPUT, index);
+
+         struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+         struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+         struct tgsi_full_dst_register tmp_dst_w =
+            writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+
+         /* ITOF/UTOF/MOV tmp, input[index] */
+         if (save_itof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_utof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_puint_to_snorm_mask & (1 << index)) {
+            emit_puint_to_snorm(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_uscaled_mask & (1 << index)) {
+            emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_sscaled_mask & (1 << index)) {
+            emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
+         }
+         else {
+            assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
+            emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+
+         if (save_is_bgra_mask & (1 << index)) {
+            emit_swap_r_b(emit, &tmp_dst, &tmp_src);
+         }
+
+         if (save_w_1_mask & (1 << index)) {
+            /* MOV tmp.w, 1.0 */
+            if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one_int, FALSE);
+            }
+            else {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one, FALSE);
+            }
+         }
+      }
+
+      emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
+      emit->key.vs.adjust_attrib_itof = save_itof_mask;
+      emit->key.vs.adjust_attrib_utof = save_utof_mask;
+      emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
+      emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
+      emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
+      emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
+   }
+}
+
+
+/**
+ * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
+ * to implement some instructions.  We pre-allocate those values here
+ * in the immediate constant buffer.
+ */
+static void
+alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned n = 0;
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_int4(emit, 0, 1, 0, -1);
+
+   if (emit->key.vs.attrib_puint_to_snorm) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_uscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_sscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 12, 2, 0);
+
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 30, 0, 0);
+   }
+
+   assert(n <= Elements(emit->common_immediate_pos));
+   emit->num_common_immediates = n;
+}
+
+
+/**
+ * Emit any extra/helper declarations/code that we might need between
+ * the declaration section and code section.
+ */
+static boolean
+emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   /* Properties */
+   if (emit->unit == PIPE_SHADER_GEOMETRY)
+      emit_property_instructions(emit);
+
+   /* Declare inputs */
+   if (!emit_input_declarations(emit))
+      return FALSE;
+
+   /* Declare outputs */
+   if (!emit_output_declarations(emit))
+      return FALSE;
+
+   /* Declare temporary registers */
+   emit_temporaries_declaration(emit);
+
+   /* Declare constant registers */
+   emit_constant_declaration(emit);
+
+   /* Declare samplers and resources */
+   emit_sampler_declarations(emit);
+   emit_resource_declarations(emit);
+
+   /* Declare clip distance output registers */
+   if (emit->unit == PIPE_SHADER_VERTEX ||
+       emit->unit == PIPE_SHADER_GEOMETRY) {
+      emit_clip_distance_declarations(emit);
+   }
+
+   alloc_common_immediates(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+      float alpha = emit->key.fs.alpha_ref;
+      emit->fs.alpha_ref_index =
+         alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
+   }
+
+   /* Now, emit the constant block containing all the immediates
+    * declared by shader, as well as the extra ones seen above.
+    */
+   emit_vgpu10_immediates_block(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      emit_frontface_instructions(emit);
+      emit_fragcoord_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_attrib_instructions(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
+ * against the alpha reference value and discards the fragment if the
+ * comparison fails.
+ */
+static void
+emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
+                             unsigned fs_color_tmp_index)
+{
+   /* compare output color's alpha to alpha ref and kill */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src_x =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register color_src_w =
+      scalar_src(&color_src, TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register ref_src =
+      make_src_immediate_reg(emit->fs.alpha_ref_index);
+   struct tgsi_full_dst_register color_dst =
+      make_dst_output_reg(emit->fs.color_out_index[0]);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   /* dst = src0 'alpha_func' src1 */
+   emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
+                   &color_src_w, &ref_src);
+
+   /* DISCARD if dst.x == 0 */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
+   emit_src_register(emit, &tmp_src_x);
+   end_emit_instruction(emit);
+
+   /* If we don't need to broadcast the color below, emit final color here */
+   if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
+      /* MOV output.color, tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit instructions for writing a single color output to multiple
+ * color buffers.
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * property is set and the number of render targets is greater than one.
+ * \param fs_color_tmp_index  index of the temp register that holds the
+ *                            color to broadcast.
+ */
+static void
+emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
+                                 unsigned fs_color_tmp_index)
+{
+   const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
+   unsigned i;
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+   assert(n > 1);
+
+   for (i = 0; i < n; i++) {
+      unsigned output_reg = emit->fs.color_out_index[i];
+      struct tgsi_full_dst_register color_dst =
+         make_dst_output_reg(output_reg);
+
+      /* Fill in this semantic here since we'll use it later in
+       * emit_dst_register().
+       */
+      emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
+
+      /* MOV output.color[i], tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+}
+
+
+/**
+ * Emit extra helper code after the original shader code, but before the
+ * last END/RET instruction.
+ * For vertex shaders this means emitting the extra code to apply the
+ * prescale scale/translation.
+ */
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+
+      /* We no longer want emit_dst_register() to substitute the
+       * temporary fragment color register for the real color output.
+       */
+      emit->fs.color_tmp_index = INVALID_INDEX;
+
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+         emit_alpha_test_instructions(emit, fs_color_tmp_index);
+      }
+      if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+         emit_broadcast_color_instructions(emit, fs_color_tmp_index);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate the TGSI tokens into VGPU10 tokens.
+ */
+static boolean
+emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+   boolean ret = TRUE;
+   boolean pre_helpers_emitted = FALSE;
+   unsigned inst_number = 0;
+
+   tgsi_parse_init(&parse, tokens);
+
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!pre_helpers_emitted) {
+            ret = emit_pre_helpers(emit);
+            if (!ret)
+               goto done;
+            pre_helpers_emitted = TRUE;
+         }
+         ret = emit_vgpu10_instruction(emit, inst_number++,
+                                       &parse.FullToken.FullInstruction);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
+         if (!ret)
+            goto done;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+done:
+   tgsi_parse_free(&parse);
+   return ret;
+}
+
+
+/**
+ * Emit the first VGPU10 shader tokens.
+ */
+static boolean
+emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken ptoken;
+
+   /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
+   ptoken.majorVersion = 4;
+   ptoken.minorVersion = 0;
+   ptoken.programType = translate_shader_type(emit->unit);
+   if (!emit_dword(emit, ptoken.value))
+      return FALSE;
+
+   /* Second token: total length of shader, in tokens.  We can't fill this
+    * in until we're all done.  Emit zero for now.
+    */
+   return emit_dword(emit, 0);
+}
+
+
+static boolean
+emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken *tokens;
+
+   /* Replace the second token with total shader length */
+   tokens = (VGPU10ProgramToken *) emit->buf;
+   tokens[1].value = emit_get_num_tokens(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Modify the FS to read the BCOLORs and use the FACE register
+ * to choose between the front/back colors.
+ */
+static const struct tgsi_token *
+transform_fs_twoside(const struct tgsi_token *tokens)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_two_side(tokens);
+   if (0) {
+      debug_printf("After tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+
+/**
+ * Modify the FS to do polygon stipple.
+ */
+static const struct tgsi_token *
+transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_token *tokens)
+{
+   const struct tgsi_token *new_tokens;
+   unsigned unit;
+
+   if (0) {
+      debug_printf("Before pstipple ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+
+   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+   emit->fs.pstipple_sampler_unit = unit;
+
+   /* Setup texture state for stipple */
+   emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+   emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+   emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+   emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+   emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+   if (0) {
+      debug_printf("After pstipple ------------------\n");
+      tgsi_dump(new_tokens, 0);
+   }
+
+   return new_tokens;
+}
+
+/**
+ * Modify the FS to support anti-aliasing point.
+ */
+static const struct tgsi_token *
+transform_fs_aapoint(const struct tgsi_token *tokens,
+                     int aa_coord_index)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_aa_point(tokens, aa_coord_index);
+   if (0) {
+      debug_printf("After tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+/**
+ * This is the main entrypoint for the TGSI -> VPGU10 translator.
+ */
+struct svga_shader_variant *
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit)
+{
+   struct svga_shader_variant *variant = NULL;
+   struct svga_shader_emitter_v10 *emit;
+   const struct tgsi_token *tokens = shader->tokens;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   assert(unit == PIPE_SHADER_VERTEX ||
+          unit == PIPE_SHADER_GEOMETRY ||
+          unit == PIPE_SHADER_FRAGMENT);
+
+   /* These two flags cannot be used together */
+   assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
+
+   /*
+    * Setup the code emitter
+    */
+   emit = alloc_emitter();
+   if (!emit)
+      return NULL;
+
+   emit->unit = unit;
+   emit->key = *key;
+
+   emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
+                                   emit->key.gs.need_prescale);
+   emit->vposition.tmp_index = INVALID_INDEX;
+   emit->vposition.so_index = INVALID_INDEX;
+   emit->vposition.out_index = INVALID_INDEX;
+
+   emit->fs.color_tmp_index = INVALID_INDEX;
+   emit->fs.face_input_index = INVALID_INDEX;
+   emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+   emit->gs.prim_id_index = INVALID_INDEX;
+
+   emit->clip_dist_out_index = INVALID_INDEX;
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+   emit->clip_dist_so_index = INVALID_INDEX;
+   emit->clip_vertex_out_index = INVALID_INDEX;
+
+   if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
+      emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      if (key->fs.light_twoside) {
+         tokens = transform_fs_twoside(tokens);
+      }
+      if (key->fs.pstipple) {
+         const struct tgsi_token *new_tokens =
+            transform_fs_pstipple(emit, tokens);
+         if (tokens != shader->tokens) {
+            /* free the two-sided shader tokens */
+            tgsi_free_tokens(tokens);
+         }
+         tokens = new_tokens;
+      }
+      if (key->fs.aa_point) {
+         tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
+      }
+   }
+
+   if (SVGA_DEBUG & DEBUG_TGSI) {
+      debug_printf("#####################################\n");
+      debug_printf("### TGSI Shader %u\n", shader->id);
+      tgsi_dump(tokens, 0);
+   }
+
+   /**
+    * Rescan the header if the token string is different from the one
+    * included in the shader; otherwise, the header info is already up-to-date
+    */
+   if (tokens != shader->tokens) {
+      tgsi_scan_shader(tokens, &emit->info);
+   } else {
+      emit->info = shader->info;
+   }
+
+   emit->num_outputs = emit->info.num_outputs;
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      /* Compute FS input remapping to match the output from VS/GS */
+      if (gs) {
+         svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
+      } else {
+         assert(vs);
+         svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+      }
+   } else if (unit == PIPE_SHADER_GEOMETRY) {
+      assert(vs);
+      svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+   }
+
+   determine_clipping_mode(emit);
+
+   if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
+      if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
+         /* if there is stream output declarations associated
+          * with this shader or the shader writes to ClipDistance
+          * then reserve extra registers for the non-adjusted vertex position
+          * and the ClipDistance shadow copy
+          */
+         emit->vposition.so_index = emit->num_outputs++;
+
+         if (emit->clip_mode == CLIP_DISTANCE) {
+            emit->clip_dist_so_index = emit->num_outputs++;
+            if (emit->info.num_written_clipdistance > 4)
+               emit->num_outputs++;
+         }
+      }
+   }
+
+   /*
+    * Do actual shader translation.
+    */
+   if (!emit_vgpu10_header(emit)) {
+      debug_printf("svga: emit VGPU10 header failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_instructions(emit, tokens)) {
+      debug_printf("svga: emit VGPU10 instructions failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_tail(emit)) {
+      debug_printf("svga: emit VGPU10 tail failed\n");
+      goto cleanup;
+   }
+
+   if (emit->register_overflow) {
+      goto cleanup;
+   }
+
+   /*
+    * Create, initialize the 'variant' object.
+    */
+   variant = CALLOC_STRUCT(svga_shader_variant);
+   if (!variant)
+      goto cleanup;
+
+   variant->shader = shader;
+   variant->nr_tokens = emit_get_num_tokens(emit);
+   variant->tokens = (const unsigned *)emit->buf;
+   emit->buf = NULL;  /* buffer is no longer owed by emitter context */
+   memcpy(&variant->key, key, sizeof(*key));
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   /* The extra constant starting offset starts with the number of
+    * shader constants declared in the shader.
+    */
+   variant->extra_const_start = emit->num_shader_consts[0];
+   if (key->gs.wide_point) {
+      /**
+       * The extra constant added in the transformed shader
+       * for inverse viewport scale is to be supplied by the driver.
+       * So the extra constant starting offset needs to be reduced by 1.
+       */
+      assert(variant->extra_const_start > 0);
+      variant->extra_const_start--;
+   }
+
+   variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+
+   /** keep track in the variant if flat interpolation is used
+    *  for any of the varyings.
+    */
+   variant->uses_flat_interp = emit->uses_flat_interp;
+
+   if (tokens != shader->tokens) {
+      tgsi_free_tokens(tokens);
+   }
+
+cleanup:
+   free_emitter(emit);
+
+   return variant;
+}
-- 
cgit v1.2.3


From 53d07910c3a3be496f5c789b708a82d2022ecd5b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 14:57:22 -0600
Subject: svga: add new svga_cmd_vgpu10.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_cmd_vgpu10.c | 1289 ++++++++++++++++++++++++++++
 1 file changed, 1289 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_cmd_vgpu10.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
new file mode 100644
index 00000000000..596ba953cd2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -0,0 +1,1289 @@
+/**********************************************************
+ * Copyright 2008-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_cmd_vgpu10.c
+ *
+ * Command construction utility for the vgpu10 SVGA3D protocol.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+
+#include "svga_winsys.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_cmd.h"
+
+
+/**
+ * Emit a surface relocation for RenderTargetViewId
+ */
+static void
+view_relocation(struct svga_winsys_context *swc, // IN
+                struct pipe_surface *surface,    // IN
+                SVGA3dRenderTargetViewId *id,    // OUT
+                unsigned flags)
+{
+   if (surface) {
+      struct svga_surface *s = svga_surface(surface);
+      assert(s->handle);
+      swc->surface_relocation(swc, id, NULL, s->handle, flags);
+   }
+   else {
+      swc->surface_relocation(swc, id, NULL, NULL, flags);
+   }
+}
+
+
+/**
+ * Emit a surface relocation for a ResourceId.
+ */
+static void
+surface_to_resourceid(struct svga_winsys_context *swc, // IN
+                      struct svga_winsys_surface *surface,    // IN
+                      SVGA3dSurfaceId *sid,            // OUT
+                      unsigned flags)                  // IN
+{
+   if (surface) {
+      swc->surface_relocation(swc, sid, NULL, surface, flags);
+   }
+   else {
+      swc->surface_relocation(swc, sid, NULL, NULL, flags);
+   }
+}
+
+
+#define SVGA3D_CREATE_COMMAND(CommandName, CommandCode) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_CREATE_CMD_COUNT(CommandName, CommandCode, ElementClassName) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   assert(count > 0); \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName) + \
+                            count * sizeof(ElementClassName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_COPY_BASIC(VariableName) \
+{ \
+   cmd->VariableName = VariableName; \
+}
+
+#define SVGA3D_COPY_BASIC_2(VariableName1, VariableName2) \
+{ \
+   SVGA3D_COPY_BASIC(VariableName1); \
+   SVGA3D_COPY_BASIC(VariableName2); \
+}
+
+#define SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC(VariableName3); \
+}
+
+#define SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                            VariableName4) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC_2(VariableName3, VariableName4); \
+}
+
+#define SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_2(VariableName4, VariableName5); \
+}
+
+#define SVGA3D_COPY_BASIC_6(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_3(VariableName4, VariableName5, VariableName6); \
+}
+
+#define SVGA3D_COPY_BASIC_7(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_3(VariableName5, VariableName6, VariableName7); \
+}
+
+#define SVGA3D_COPY_BASIC_8(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_4(VariableName5, VariableName6, VariableName7, \
+                       VariableName8); \
+}
+
+#define SVGA3D_COPY_BASIC_9(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8, VariableName9) \
+{\
+   SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                       VariableName4, VariableName5); \
+   SVGA3D_COPY_BASIC_4(VariableName6, VariableName7, VariableName8, \
+                       VariableName9); \
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box)
+{
+   SVGA3dCmdDXPredCopyRegion *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY_REGION,
+                         sizeof(SVGA3dCmdDXPredCopyRegion),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+   cmd->dstSubResource = dstSubResource;
+   cmd->srcSubResource = srcSubResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf)
+{
+   SVGA3dCmdDXPredCopy *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY,
+                         sizeof(SVGA3dCmdDXPredCopy),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dViewport *viewports)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetViewports, SET_VIEWPORTS, SVGA3dViewport);
+
+   memcpy(cmd + 1, viewports, count * sizeof(SVGA3dViewport));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId)
+{
+   SVGA3dCmdDXSetShader *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_SET_SHADER,
+                                                  sizeof *cmd,
+                                                  1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shaderId, NULL, NULL, gbshader, 0);
+
+   cmd->type = type;
+   cmd->shaderId = shaderId;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views)
+{
+   SVGA3dCmdDXSetShaderResources *cmd;
+   SVGA3dShaderResourceViewId *cmd_ids;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SHADER_RESOURCES,
+                            sizeof(SVGA3dCmdDXSetShaderResources) +
+                            count * sizeof(SVGA3dShaderResourceViewId),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   cmd->type = type;
+   cmd->startView = startView;
+
+   cmd_ids = (SVGA3dShaderResourceViewId *) (cmd + 1);
+   for (i = 0; i < count; i++) {
+      swc->surface_relocation(swc, cmd_ids + i, NULL, views[i],
+                              SVGA_RELOC_READ);
+      cmd_ids[i] = ids[i];
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetSamplers, SET_SAMPLERS, SVGA3dSamplerId);
+
+   SVGA3D_COPY_BASIC_2(startSampler, type);
+   memcpy(cmd + 1, samplerIds, count * sizeof(SVGA3dSamplerId));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba)
+{
+   SVGA3dCmdDXClearRenderTargetView *cmd;
+   struct svga_surface *ss = svga_surface(color_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXClearRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original RenderTargetViewId.
+    */
+   view_relocation(swc, color_surf, &cmd->renderTargetViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->renderTargetViewId = ss->view_id;
+
+   COPY_4V(cmd->rgba.value, rgba);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf)
+{
+   const unsigned surf_count = color_count + 1;
+   SVGA3dCmdDXSetRenderTargets *cmd;
+   SVGA3dRenderTargetViewId *ctarget;
+   struct svga_surface *ss;
+   unsigned i;
+
+   assert(surf_count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_RENDERTARGETS,
+                            sizeof(SVGA3dCmdDXSetRenderTargets) +
+                            color_count * sizeof(SVGA3dRenderTargetViewId),
+                            surf_count); /* 'surf_count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: See earlier comment about the tricky handling of the ViewIds.
+    */
+
+   /* Depth / Stencil buffer */
+   if (depth_stencil_surf) {
+      ss = svga_surface(depth_stencil_surf);
+      view_relocation(swc, depth_stencil_surf, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = ss->view_id;
+   }
+   else {
+      /* no depth/stencil buffer - still need a relocation */
+      view_relocation(swc, NULL, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = SVGA3D_INVALID_ID;
+   }
+
+   /* Color buffers */
+   ctarget = (SVGA3dRenderTargetViewId *) &cmd[1];
+   for (i = 0; i < color_count; i++) {
+      if (color_surfs[i]) {
+         ss = svga_surface(color_surfs[i]);
+         view_relocation(swc, color_surfs[i], ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = ss->view_id;
+      }
+      else {
+         view_relocation(swc, NULL, ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = SVGA3D_INVALID_ID;
+      }
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor,
+                            uint32 sampleMask)
+{
+   SVGA3D_CREATE_COMMAND(SetBlendState, SET_BLEND_STATE);
+
+   SVGA3D_COPY_BASIC_2(blendId, sampleMask);
+   memcpy(cmd->blendFactor, blendFactor, sizeof(float) * 4);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef)
+{
+   SVGA3D_CREATE_COMMAND(SetDepthStencilState, SET_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_2(depthStencilId, stencilRef);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(SetRasterizerState, SET_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue)
+{
+   SVGA3dCmdDXSetPredication *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_PREDICATION,
+                            sizeof *cmd, 0);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   cmd->predicateValue = predicateValue;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetSOTargets *cmd;
+   SVGA3dSoTarget *sot;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SOTARGETS,
+                            sizeof(SVGA3dCmdDXSetSOTargets) +
+                            count * sizeof(SVGA3dSoTarget),
+                            count);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->pad0 = 0;
+   sot = (SVGA3dSoTarget *)(cmd + 1);
+   for (i = 0; i < count; i++, sot++) {
+      if (surfaces[i]) {
+         sot->offset = targets[i].offset;
+         sot->sizeInBytes = targets[i].sizeInBytes;
+         swc->surface_relocation(swc, &sot->sid, NULL, surfaces[i],
+                                 SVGA_RELOC_WRITE);
+      }
+      else {
+         sot->offset = 0;
+         sot->sizeInBytes = ~0u;
+         swc->surface_relocation(swc, &sot->sid, NULL, NULL,
+                                 SVGA_RELOC_WRITE);
+      }
+   }
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects)
+{
+   SVGA3dCmdDXSetScissorRects *cmd;
+
+   assert(count > 0);
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SCISSORRECTS,
+                            sizeof(SVGA3dCmdDXSetScissorRects) +
+                            count * sizeof(SVGASignedRect),
+                            0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   memcpy(cmd + 1, rects, count * sizeof(SVGASignedRect));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(SetStreamOutput, SET_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount,
+                   uint32 startVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(Draw, DRAW);
+
+   SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount,
+                          uint32 startIndexLocation,
+                          int32 baseVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexed, DRAW_INDEXED);
+
+   SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
+                       baseVertexLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawInstanced, DRAW_INSTANCED);
+
+   SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
+                       startVertexLocation, startInstanceLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexedInstanced, DRAW_INDEXED_INSTANCED);
+
+   SVGA3D_COPY_BASIC_5(indexCountPerInstance, instanceCount,
+                       startIndexLocation, baseVertexLocation,
+                       startInstanceLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
+{
+   SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags)
+{
+   SVGA3D_CREATE_COMMAND(DefineQuery, DEFINE_QUERY);
+
+   SVGA3D_COPY_BASIC_3(queryId, type, flags);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyQuery, DESTROY_QUERY);
+
+   cmd->queryId = queryId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId)
+{
+   SVGA3dCmdDXBindQuery *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_BIND_QUERY,
+                                                  sizeof *cmd,
+                                                  1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   swc->query_relocation(swc, &cmd->mobid, gbQuery);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset)
+{
+   SVGA3D_CREATE_COMMAND(SetQueryOffset, SET_QUERY_OFFSET);
+   SVGA3D_COPY_BASIC_2(queryId, mobOffset);
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(BeginQuery, BEGIN_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(EndQuery, END_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags,
+                                    uint16 stencil,
+                                    float depth)
+{
+   SVGA3dCmdDXClearDepthStencilView *cmd;
+   struct svga_surface *ss = svga_surface(ds_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXClearDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original DepthStencilViewId.
+    */
+   view_relocation(swc, ds_surf, &cmd->depthStencilViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->depthStencilViewId = ss->view_id;
+   cmd->flags = flags;
+   cmd->stencil = stencil;
+   cmd->depth = depth;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surface,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc)
+{
+   SVGA3dCmdDXDefineShaderResourceView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW,
+                            sizeof(SVGA3dCmdDXDefineShaderResourceView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(shaderResourceViewId, format, resourceDimension);
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ);
+
+   cmd->desc = *desc;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShaderResourceView,
+                       DESTROY_SHADERRESOURCE_VIEW);
+
+   cmd->shaderResourceViewId = shaderResourceViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineRenderTargetView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXDefineRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(renderTargetViewId, format, resourceDimension);
+   cmd->desc = *desc;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                 SVGA3dRenderTargetViewId renderTargetViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRenderTargetView, DESTROY_RENDERTARGET_VIEW);
+
+   cmd->renderTargetViewId = renderTargetViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineDepthStencilView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXDefineDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(depthStencilViewId, format, resourceDimension);
+   cmd->mipSlice = desc->tex.mipSlice;
+   cmd->firstArraySlice = desc->tex.firstArraySlice;
+   cmd->arraySize = desc->tex.arraySize;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                 SVGA3dDepthStencilViewId depthStencilViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilView, DESTROY_DEPTHSTENCIL_VIEW);
+
+   cmd->depthStencilViewId = depthStencilViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                                  unsigned count,
+                                  SVGA3dElementLayoutId elementLayoutId,
+                                  const SVGA3dInputElementDesc *elements)
+{
+   SVGA3dCmdDXDefineElementLayout *cmd;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT,
+                            sizeof(SVGA3dCmdDXDefineElementLayout) +
+                            count * sizeof(SVGA3dInputElementDesc), 0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* check that all offsets are multiples of four */
+   for (i = 0; i < count; i++) {
+      assert(elements[i].alignedByteOffset % 4 == 0);
+   }
+   (void) i; /* silence unused var in release build */
+
+   cmd->elementLayoutId = elementLayoutId;
+   memcpy(cmd + 1, elements, count * sizeof(SVGA3dInputElementDesc));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyElementLayout, DESTROY_ELEMENTLAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT)
+{
+   SVGA3D_CREATE_COMMAND(DefineBlendState, DEFINE_BLEND_STATE);
+
+   cmd->blendId = blendId;
+   cmd->alphaToCoverageEnable = alphaToCoverageEnable;
+   cmd->independentBlendEnable = independentBlendEnable;
+   memcpy(cmd->perRT, perRT, sizeof(cmd->perRT));
+   cmd->pad0 = 0;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyBlendState, DESTROY_BLEND_STATE);
+
+   cmd->blendId = blendId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc)
+{
+   SVGA3D_CREATE_COMMAND(DefineDepthStencilState, DEFINE_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_9(depthStencilId, depthEnable,
+                       depthWriteMask, depthFunc,
+                       stencilEnable, frontEnable,
+                       backEnable, stencilReadMask,
+                       stencilWriteMask);
+   SVGA3D_COPY_BASIC_8(frontStencilFailOp, frontStencilDepthFailOp,
+                       frontStencilPassOp, frontStencilFunc,
+                       backStencilFailOp, backStencilDepthFailOp,
+                       backStencilPassOp, backStencilFunc);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                    SVGA3dDepthStencilStateId depthStencilId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilState,
+                         DESTROY_DEPTHSTENCIL_STATE);
+
+   cmd->depthStencilId = depthStencilId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast)
+{
+   SVGA3D_CREATE_COMMAND(DefineRasterizerState, DEFINE_RASTERIZER_STATE);
+
+   SVGA3D_COPY_BASIC_5(rasterizerId, fillMode,
+                       cullMode, frontCounterClockwise,
+                       depthBias);
+   SVGA3D_COPY_BASIC_6(depthBiasClamp, slopeScaledDepthBias,
+                       depthClipEnable, scissorEnable,
+                       multisampleEnable, antialiasedLineEnable);
+   cmd->lineWidth = lineWidth;
+   cmd->lineStippleEnable = lineStippleEnable;
+   cmd->lineStippleFactor = lineStippleFactor;
+   cmd->lineStipplePattern = lineStipplePattern;
+   cmd->provokingVertexLast = provokingVertexLast;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRasterizerState, DESTROY_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD)
+{
+   SVGA3D_CREATE_COMMAND(DefineSamplerState, DEFINE_SAMPLER_STATE);
+
+   SVGA3D_COPY_BASIC_6(samplerId, filter,
+                       addressU, addressV,
+                       addressW, mipLODBias);
+   SVGA3D_COPY_BASIC_5(maxAnisotropy, comparisonFunc,
+                       borderColor, minLOD,
+                       maxLOD);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroySamplerState, DESTROY_SAMPLER_STATE);
+
+   cmd->samplerId = samplerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes)
+{
+   SVGA3dCmdHeader *header;
+   SVGA3dCmdDXDefineShader *dcmd;
+   SVGA3dCmdDXBindShader *bcmd;
+   unsigned totalSize = 2 * sizeof(*header) +
+                        sizeof(*dcmd) + sizeof(*bcmd);
+
+   /* Make sure there is room for both commands */
+   header = swc->reserve(swc, totalSize, 2);
+   if (!header)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* DXDefineShader command */
+   header->id = SVGA_3D_CMD_DX_DEFINE_SHADER;
+   header->size = sizeof(*dcmd);
+   dcmd = (SVGA3dCmdDXDefineShader *)(header + 1);
+   dcmd->shaderId = shaderId;
+   dcmd->type = type;
+   dcmd->sizeInBytes = sizeInBytes;
+
+   /* DXBindShader command */
+   header = (SVGA3dCmdHeader *)(dcmd + 1);
+
+   header->id = SVGA_3D_CMD_DX_BIND_SHADER;
+   header->size = sizeof(*bcmd);
+   bcmd = (SVGA3dCmdDXBindShader *)(header + 1);
+
+   bcmd->cid = swc->cid;
+   swc->shader_relocation(swc, NULL, &bcmd->mobid,
+                          &bcmd->offsetInBytes, gbshader, 0);
+
+   bcmd->shid = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShader, DESTROY_SHADER);
+
+   cmd->shaderId = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+       SVGA3dStreamOutputId soid,
+       uint32 numOutputStreamEntries,
+       uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+       const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS])
+{
+   unsigned i;
+   SVGA3D_CREATE_COMMAND(DefineStreamOutput, DEFINE_STREAMOUTPUT);
+
+   cmd->soid = soid;
+   cmd->numOutputStreamEntries = numOutputStreamEntries;
+
+   for (i = 0; i < Elements(cmd->streamOutputStrideInBytes); i++)
+      cmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i];
+
+   memcpy(cmd->decl, decl,
+          sizeof(SVGA3dStreamOutputDeclarationEntry)
+          * SVGA3D_MAX_STREAMOUT_DECLS);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(DestroyStreamOutput, DESTROY_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(SetInputLayout, SET_INPUT_LAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetVertexBuffers *cmd;
+   SVGA3dVertexBuffer *bufs;
+   unsigned i;
+
+   assert(count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS,
+                            sizeof(SVGA3dCmdDXSetVertexBuffers) +
+                            count * sizeof(SVGA3dVertexBuffer),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->startBuffer = startBuffer;
+
+   bufs = (SVGA3dVertexBuffer *) &cmd[1];
+   for (i = 0; i < count; i++) {
+      bufs[i].stride = bufferInfo[i].stride;
+      bufs[i].offset = bufferInfo[i].offset;
+      assert(bufs[i].stride % 4 == 0);
+      assert(bufs[i].offset % 4 == 0);
+      swc->surface_relocation(swc, &bufs[i].sid, NULL, surfaces[i],
+                              SVGA_RELOC_READ);
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology)
+{
+   SVGA3D_CREATE_COMMAND(SetTopology, SET_TOPOLOGY);
+
+   cmd->topology = topology;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format,
+                             uint32 offset)
+{
+   SVGA3dCmdDXSetIndexBuffer *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_INDEX_BUFFER,
+                            sizeof(SVGA3dCmdDXSetIndexBuffer),
+                            1); /* one relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, indexes, SVGA_RELOC_READ);
+   SVGA3D_COPY_BASIC_2(format, offset);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes)
+{
+   SVGA3dCmdDXSetSingleConstantBuffer *cmd;
+
+   assert(offsetInBytes % 256 == 0);
+   if (!surface)
+      assert(sizeInBytes == 0);
+   else
+      assert(sizeInBytes > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER,
+                            sizeof(SVGA3dCmdDXSetSingleConstantBuffer),
+                            1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->slot = slot;
+   cmd->type = type;
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface, SVGA_RELOC_READ);
+   cmd->offsetInBytes = offsetInBytes;
+   cmd->sizeInBytes = sizeInBytes;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource)
+{
+   SVGA3dCmdDXReadbackSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_READBACK_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXReadbackSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource)
+{
+   SVGA3dCmdDXUpdateSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXUpdateSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
-- 
cgit v1.2.3


From 9cb2d9ddfacfcf7fe3f91942d0ad147ccafd29cf Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:21:10 -0600
Subject: svga: add new svga_link.[ch] files

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_link.c | 120 +++++++++++++++++++++++++++++++++++
 src/gallium/drivers/svga/svga_link.h |  20 ++++++
 2 files changed, 140 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_link.c
 create mode 100644 src/gallium/drivers/svga/svga_link.h

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_link.c b/src/gallium/drivers/svga/svga_link.c
new file mode 100644
index 00000000000..f3e524d38ea
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_link.c
@@ -0,0 +1,120 @@
+/*/
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "svga_context.h"
+#include "svga_link.h"
+
+#include "tgsi/tgsi_strings.h"
+
+
+#define INVALID_INDEX 255
+
+
+/**
+ * Examine input and output shaders info to link outputs from the
+ * output shader to inputs from the input shader.
+ * Basically, we'll remap input shader's input slots to new numbers
+ * based on semantic name/index of the outputs from the output shader.
+ */
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage)
+{
+   unsigned i, free_slot;
+
+   for (i = 0; i < Elements(linkage->input_map); i++) {
+      linkage->input_map[i] = INVALID_INDEX;
+   }
+
+   /* Assign input slots for input shader inputs.
+    * Basically, we want to use the same index for the output shader's outputs
+    * and the input shader's inputs that should be linked together.
+    * We'll modify the input shader's inputs to match the output shader.
+    */
+   assert(inshader_info->num_inputs <=
+          Elements(inshader_info->input_semantic_name));
+
+   /* free register index that can be used for built-in varyings */
+   free_slot = outshader_info->num_outputs + 1;
+
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      unsigned sem_name = inshader_info->input_semantic_name[i];
+      unsigned sem_index = inshader_info->input_semantic_index[i];
+      unsigned j;
+      /**
+       * Get the clip distance inputs from the output shader's
+       * clip distance shadow copy.
+       */
+      if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index;
+         /* make sure free_slot includes this extra output */
+         free_slot = MAX2(free_slot, linkage->input_map[i] + 1);
+      }
+      else {
+         /* search output shader outputs for same item */
+         for (j = 0; j < outshader_info->num_outputs; j++) {
+            assert(j < Elements(outshader_info->output_semantic_name));
+            if (outshader_info->output_semantic_name[j] == sem_name &&
+                outshader_info->output_semantic_index[j] == sem_index) {
+               linkage->input_map[i] = j;
+               break;
+            }
+         }
+      }
+   }
+
+   linkage->num_inputs = inshader_info->num_inputs;
+
+   /* Things like the front-face register are handled here */
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      if (linkage->input_map[i] == INVALID_INDEX) {
+         unsigned j = free_slot++;
+         linkage->input_map[i] = j;
+      }
+   }
+
+   /* Debug */
+   if (0) {
+      unsigned reg = 0;
+      for (i = 0; i < linkage->num_inputs; i++) {
+
+         assert(linkage->input_map[i] != INVALID_INDEX);
+
+         debug_printf("input shader input[%d] slot %u  %s %u %s\n",
+                      i,
+                      linkage->input_map[i],
+                      tgsi_semantic_names[inshader_info->input_semantic_name[i]],
+                      inshader_info->input_semantic_index[i],
+                      tgsi_interpolate_names[inshader_info->input_interpolate[i]]);
+
+         /* make sure no repeating register index */
+         if (reg & 1 << linkage->input_map[i]) {
+            assert(0);
+         }
+         reg |= 1 << linkage->input_map[i];
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_link.h b/src/gallium/drivers/svga/svga_link.h
new file mode 100644
index 00000000000..724c61194a2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_link.h
@@ -0,0 +1,20 @@
+
+#ifndef SVGA_LINK_H
+#define SVGA_LINK_H
+
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+struct shader_linkage
+{
+   unsigned num_inputs;
+   ubyte input_map[PIPE_MAX_SHADER_INPUTS];
+};
+
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage);
+
+#endif /* SVGA_LINK_H */
-- 
cgit v1.2.3


From 7ce20cf59a1171c4fa2e17a6c0c2f7e24b1150d2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:21:29 -0600
Subject: svga: add new svga_pipe_gs.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_pipe_gs.c | 142 ++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_pipe_gs.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_pipe_gs.c b/src/gallium/drivers/svga/svga_pipe_gs.c
new file mode 100644
index 00000000000..752c8c700e0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_gs.c
@@ -0,0 +1,142 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+#include "svga_shader.h"
+#include "svga_streamout.h"
+
+static void *
+svga_create_gs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader);
+
+   if (!gs)
+      return NULL;
+
+   gs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(gs->base.tokens, &gs->base.info);
+
+   gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ);
+
+   gs->base.id = svga->debug.shader_id++;
+
+   gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      gs->base.stream_output = svga_create_stream_output(svga, &gs->base,
+                                                         &templ->stream_output);
+   }
+
+   return gs;
+}
+
+
+static void
+svga_bind_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.user_gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+static void
+svga_delete_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *next_gs;  
+   struct svga_shader_variant *variant, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry(svga);
+
+   /* Start deletion from the original geometry shader state */
+   if (gs->base.parent != NULL)
+      gs = (struct svga_geometry_shader *)gs->base.parent;
+
+   /* Free the list of geometry shaders */
+   while (gs) {
+      next_gs = (struct svga_geometry_shader *)gs->base.next;  
+
+      if (gs->base.stream_output != NULL) 
+         svga_delete_stream_output(svga, gs->base.stream_output);
+
+      draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
+
+      for (variant = gs->base.variants; variant; variant = tmp) {
+         tmp = variant->next;
+
+         /* Check if deleting currently bound shader */
+         if (variant == svga->state.hw_draw.gs) {
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+            if (ret != PIPE_OK) {
+               svga_context_flush(svga, NULL);
+               ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+               assert(ret == PIPE_OK);
+            }
+            svga->state.hw_draw.gs = NULL;
+         }
+
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS,
+                                              variant);
+            assert(ret == PIPE_OK);
+         }
+      }
+
+      FREE((void *)gs->base.tokens);
+      FREE(gs);
+      gs = next_gs;
+   }
+}
+
+
+void
+svga_init_gs_functions(struct svga_context *svga)
+{
+   svga->pipe.create_gs_state = svga_create_gs_state;
+   svga->pipe.bind_gs_state = svga_bind_gs_state;
+   svga->pipe.delete_gs_state = svga_delete_gs_state;
+}
-- 
cgit v1.2.3


From ff85bcdba21ccf88f1a359f2055847e2c820e85f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:21:46 -0600
Subject: svga: add new svga_pipe_streamout.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_pipe_streamout.c | 320 +++++++++++++++++++++++++
 1 file changed, 320 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_pipe_streamout.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c
new file mode 100644
index 00000000000..05672a95165
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -0,0 +1,320 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_resource_buffer.h"
+#include "svga_shader.h"
+#include "svga_debug.h"
+#include "svga_streamout.h"
+
+struct svga_stream_output_target {
+   struct pipe_stream_output_target base;
+};
+
+/** cast wrapper */
+static INLINE struct svga_stream_output_target *
+svga_stream_output_target(struct pipe_stream_output_target *s)
+{
+   return (struct svga_stream_output_target *)s;
+}
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info)
+{
+   struct svga_stream_output *streamout;
+   SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
+   unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
+   unsigned i;
+   enum pipe_error ret;
+   unsigned id;
+
+   assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
+
+   /* Gallium utility creates shaders with stream output.
+    * For non-DX10, just return NULL.
+    */
+   if (!svga_have_vgpu10(svga))
+      return NULL;
+
+   assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS);
+
+   /* Allocate an integer ID for the stream output */
+   id = util_bitmask_add(svga->stream_output_id_bm);
+   if (id == UTIL_BITMASK_INVALID_INDEX) {
+      return NULL;
+   }
+
+   /* Allocate the streamout data structure */
+   streamout = CALLOC_STRUCT(svga_stream_output);
+
+   if (streamout == NULL)
+      return NULL;
+
+   streamout->info = *info;
+   streamout->id = id;
+   streamout->pos_out_index = -1;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__,
+            info->num_outputs, id);
+
+   /* init whole decls and stride arrays to zero to avoid garbage values */
+   memset(decls, 0, sizeof(decls));
+   memset(strides, 0, sizeof(strides));
+
+   for (i = 0; i < info->num_outputs; i++) {
+      unsigned reg_idx = info->output[i].register_index;
+      unsigned buf_idx = info->output[i].output_buffer;
+      const unsigned sem_name = shader->info.output_semantic_name[reg_idx];
+
+      assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
+
+      if (sem_name == TGSI_SEMANTIC_POSITION) {
+         /**
+          * Check if streaming out POSITION. If so, replace the
+          * register index with the index for NON_ADJUSTED POSITION.
+          */
+         decls[i].registerIndex = shader->info.num_outputs;
+
+         /* Save this output index, so we can tell later if this stream output
+          * includes an output of a vertex position
+          */
+         streamout->pos_out_index = i;
+      }
+      else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         /**
+          * Use the shadow copy for clip distance because
+          * CLIPDIST instruction is only emitted for enabled clip planes.
+          * It's valid to write to ClipDistance variable for non-enabled
+          * clip planes.
+          */
+         decls[i].registerIndex = shader->info.num_outputs + 1 +
+                                  shader->info.output_semantic_index[reg_idx];
+      }
+      else {
+         decls[i].registerIndex = reg_idx;
+      }
+
+      decls[i].outputSlot = buf_idx;
+      decls[i].registerMask =
+         ((1 << info->output[i].num_components) - 1)
+            << info->output[i].start_component;
+
+      SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n",
+               i, decls[i].outputSlot, decls[i].registerIndex,
+               decls[i].registerMask);
+
+      strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
+   }
+
+   ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                          info->num_outputs,
+                                          strides,
+                                          decls);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                             info->num_outputs,
+                                             strides,
+                                             decls);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->stream_output_id_bm, id);
+         FREE(streamout);
+         streamout = NULL;
+      }
+   }
+   return streamout;
+}
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
+
+   if (!svga_have_vgpu10(svga)) {
+      return PIPE_OK;
+   }
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,
+            streamout, id);
+
+   if (svga->current_so != streamout) {
+      /* Save current SO state */
+      svga->current_so = streamout;
+
+      ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      }
+   }
+
+   return ret;
+}
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout)
+{
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
+
+   assert(svga_have_vgpu10(svga));
+   assert(streamout != NULL);
+
+   ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   }
+
+   /* Release the ID */
+   util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
+
+   /* Free streamout structure */
+   FREE(streamout);
+}
+
+static struct pipe_stream_output_target *
+svga_create_stream_output_target(struct pipe_context *pipe,
+                                 struct pipe_resource *buffer,
+                                 unsigned buffer_offset,
+                                 unsigned buffer_size)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_stream_output_target *sot;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,
+            buffer_offset, buffer_size);
+
+   assert(svga_have_vgpu10(svga));
+   (void) svga;
+
+   sot = CALLOC_STRUCT(svga_stream_output_target);
+   if (!sot)
+      return NULL;
+
+   pipe_reference_init(&sot->base.reference, 1);
+   pipe_resource_reference(&sot->base.buffer, buffer);
+   sot->base.context = pipe;
+   sot->base.buffer = buffer;
+   sot->base.buffer_offset = buffer_offset;
+   sot->base.buffer_size = buffer_size;
+
+   return &sot->base;
+}
+
+static void
+svga_destroy_stream_output_target(struct pipe_context *pipe,
+                                  struct pipe_stream_output_target *target)
+{
+   struct svga_stream_output_target *sot = svga_stream_output_target(target);
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);
+
+   pipe_resource_reference(&sot->base.buffer, NULL);
+   FREE(sot);
+}
+
+static void
+svga_set_stream_output_targets(struct pipe_context *pipe,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               const unsigned *offsets)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
+   enum pipe_error ret;
+   unsigned i;
+   unsigned num_so_targets;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
+            num_targets);
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Mark the streamout buffers as dirty so that we'll issue readbacks
+    * before mapping.
+    */
+   for (i = 0; i < svga->num_so_targets; i++) {
+      struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
+      sbuf->dirty = TRUE;
+   }
+
+   assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
+
+   for (i = 0; i < num_targets; i++) {
+      struct svga_stream_output_target *sot
+         = svga_stream_output_target(targets[i]);
+      struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
+      unsigned size;
+
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
+      (void) sbuf;
+
+      svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer);
+      svga->so_targets[i] = &sot->base;
+      soBindings[i].offset = sot->base.buffer_offset;
+
+      /* The size cannot extend beyond the end of the buffer.  Clamp it. */
+      size = MIN2(sot->base.buffer_size,
+                  sot->base.buffer->width0 - sot->base.buffer_offset);
+
+      soBindings[i].sizeInBytes = size;
+   }
+
+   /* unbind any previously bound stream output buffers */
+   for (; i < svga->num_so_targets; i++) {
+      svga->so_surfaces[i] = NULL;
+      svga->so_targets[i] = NULL;
+   }
+
+   num_so_targets = MAX2(svga->num_so_targets, num_targets);
+   ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                    soBindings, svga->so_surfaces);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                       soBindings, svga->so_surfaces);
+   }
+
+   svga->num_so_targets = num_targets;
+}
+
+void
+svga_init_stream_output_functions(struct svga_context *svga)
+{
+   svga->pipe.create_stream_output_target = svga_create_stream_output_target;
+   svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
+   svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
+}
-- 
cgit v1.2.3


From a633948e7ea2445ff88c469edf47c44cfec8d962 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:22:01 -0600
Subject: svga: add new svga_state_gs.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_state_gs.c | 264 +++++++++++++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_state_gs.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c
new file mode 100644
index 00000000000..9f6885b95a2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_gs.c
@@ -0,0 +1,264 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "translate/translate.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+#include "svga_streamout.h"
+#include "svga_format.h"
+
+/**
+ * If we fail to compile a geometry shader we'll use a dummy/fallback shader
+ * that simply emits the incoming vertices.
+ */
+static const struct tgsi_token *
+get_dummy_geometry_shader(void)
+{
+   //XXX
+   return NULL;
+}
+
+
+static struct svga_shader_variant *
+translate_geometry_program(struct svga_context *svga,
+                           const struct svga_geometry_shader *gs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
+                                        PIPE_SHADER_GEOMETRY);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&gs->base, key, PIPE_SHADER_GEOMETRY);
+   }
+}
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_gs(struct svga_context *svga,
+           struct svga_geometry_shader *gs,
+           const struct svga_compile_key *key,
+           struct svga_shader_variant **out_variant)
+{
+   struct svga_shader_variant *variant;
+   enum pipe_error ret = PIPE_ERROR;
+
+   variant = translate_geometry_program(svga, gs, key);
+   if (variant == NULL) {
+      /* some problem during translation, try the dummy shader */
+      const struct tgsi_token *dummy = get_dummy_geometry_shader();
+      if (!dummy) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto fail;
+      }
+      debug_printf("Failed to compile geometry shader, using dummy shader instead.\n");
+      FREE((void *) gs->base.tokens);
+      gs->base.tokens = dummy;
+      variant = translate_geometry_program(svga, gs, key);
+      if (variant == NULL) {
+         ret = PIPE_ERROR;
+         goto fail;
+      }
+   }
+
+   ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+   if (ret != PIPE_OK)
+      goto fail;
+
+   *out_variant = variant;
+
+   return PIPE_OK;
+
+fail:
+   if (variant) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+   }
+   return ret;
+}
+
+static void
+make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   memset(key, 0, sizeof *key);
+
+   /*
+    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+    */
+   svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key);
+
+   memcpy(key->generic_remap_table, gs->generic_remap_table,
+          sizeof(gs->generic_remap_table));
+
+   key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+
+   key->gs.need_prescale = svga->state.hw_clear.prescale.enabled;
+
+   key->gs.writes_psize = gs->base.info.writes_psize;
+   key->gs.wide_point = gs->wide_point;
+   key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+   key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
+                                    == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+}
+
+
+/**
+ * svga_reemit_gs_bindings - Reemit the geometry shader bindings
+ */
+enum pipe_error
+svga_reemit_gs_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
+
+   assert(svga->rebind.flags.gs);
+   assert(svga_have_gb_objects(svga));
+
+   /* Geometry Shader is only supported in vgpu10 */
+   assert(svga_have_vgpu10(svga));
+
+   if (svga->state.hw_draw.gs) {
+      gbshader = svga->state.hw_draw.gs->gb_shader;
+      shaderId = svga->state.hw_draw.gs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS,
+                                 gbshader, shaderId);
+
+ out:
+   if (ret != PIPE_OK)
+      return ret;
+
+   svga->rebind.flags.gs = FALSE;
+   return PIPE_OK;
+}
+
+static enum pipe_error
+emit_hw_gs(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_shader_variant *variant;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+   enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there's a user-defined GS, we should have a pointer to a derived
+    * GS.  This should have been resolved in update_tgsi_transform().
+    */
+   if (svga->curr.user_gs)
+      assert(svga->curr.gs);
+
+   if (gs == NULL) {
+      if (svga->state.hw_draw.gs != NULL) {
+
+         /** The previous geometry shader is made inactive.
+          *  Needs to unbind the geometry shader.
+          */
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+         svga->state.hw_draw.gs = NULL;
+      }
+      return ret;
+   }
+
+   /* If there is stream output info for this geometry shader, then use
+    * it instead of the one from the vertex shader.
+    */
+   if (svga_have_gs_streamout(svga)) {
+      svga_set_stream_output(svga, gs->base.stream_output);
+   }
+   else if (!svga_have_vs_streamout(svga)) {
+      /* turn off stream out */
+      svga_set_stream_output(svga, NULL);
+   }
+
+   /* SVGA_NEW_NEED_SWTNL */
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No geometry shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_gs_key(svga, &key);
+
+      /* See if we already have a GS variant that matches the key */
+      variant = svga_search_shader_key(&gs->base, &key);
+
+      if (!variant) {
+         ret = compile_gs(svga, gs, &key, &variant);
+         if (ret != PIPE_OK)
+            return ret;
+
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = gs->base.variants;
+         gs->base.variants = variant;
+      }
+   }
+
+   if (variant != svga->state.hw_draw.gs) {
+      /* Bind the new variant */
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->rebind.flags.gs = FALSE;
+      svga->dirty |= SVGA_NEW_GS_VARIANT;
+      svga->state.hw_draw.gs = variant;
+   }
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_hw_gs =
+{
+   "geometry shader (hwtnl)",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_NEED_SWTNL),
+   emit_hw_gs
+};
-- 
cgit v1.2.3


From 26d8bae88908b490a37b6ba39b88f44945bb5613 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:22:18 -0600
Subject: svga: add new svga_state_sampler.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_state_sampler.c | 339 ++++++++++++++++++++++++++
 1 file changed, 339 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_state_sampler.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
new file mode 100644
index 00000000000..e350f540335
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/**
+ * VGPU10 sampler and sampler view functions.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_shader.h"
+#include "svga_state.h"
+#include "svga_sampler_view.h"
+
+
+/** Get resource handle for a texture or buffer */
+static INLINE struct svga_winsys_surface *
+svga_resource_handle(struct pipe_resource *res)
+{
+   if (res->target == PIPE_BUFFER) {
+      return svga_buffer(res)->handle;
+   }
+   else {
+      return svga_texture(res)->handle;
+   }
+}
+
+
+/**
+ * This helper function returns TRUE if the specified resource collides with
+ * any of the resources bound to any of the currently bound sampler views.
+ */
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned shader, i;
+
+   if (svga_screen(screen)->debug.no_surface_view) {
+      return FALSE;
+   }
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+         if (sv && res == svga_resource_handle(sv->base.texture)) {
+            return TRUE;
+         }
+      }
+   }
+
+   return FALSE;
+}
+
+
+/**
+ * Create a DX ShaderResourceSamplerView for the given pipe_sampler_view,
+ * if needed.
+ */
+static enum pipe_error
+svga_validate_pipe_sampler_view(struct svga_context *svga,
+                                struct svga_pipe_sampler_view *sv)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   if (sv->id == SVGA3D_INVALID_ID) {
+      struct svga_screen *ss = svga_screen(svga->pipe.screen);
+      struct pipe_resource *texture = sv->base.texture;
+      struct svga_winsys_surface *surface = svga_resource_handle(texture);
+      SVGA3dSurfaceFormat format;
+      SVGA3dResourceType resourceDim;
+      SVGA3dShaderResourceViewDesc viewDesc;
+
+      format = svga_translate_format(ss, sv->base.format,
+                                     PIPE_BIND_SAMPLER_VIEW);
+      assert(format != SVGA3D_FORMAT_INVALID);
+
+      if (texture->target == PIPE_BUFFER) {
+         viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
+         viewDesc.buffer.numElements = (sv->base.u.buf.last_element - 
+                                        sv->base.u.buf.first_element + 1);
+      }
+      else {
+         viewDesc.tex.mostDetailedMip = sv->base.u.tex.first_level;
+         viewDesc.tex.firstArraySlice = sv->base.u.tex.first_layer;
+         viewDesc.tex.mipLevels = (sv->base.u.tex.last_level -
+                                   sv->base.u.tex.first_level + 1);
+      }
+
+      /* arraySize in viewDesc specifies the number of array slices in a
+       * texture array. For 3D texture, last_layer in 
+       * pipe_sampler_view specifies the last slice of the texture
+       * which is different from the last slice in a texture array,
+       * hence we need to set arraySize to 1 explicitly.
+       */
+      viewDesc.tex.arraySize = 
+         (texture->target == PIPE_TEXTURE_3D || 
+          texture->target == PIPE_BUFFER) ? 1 :
+            (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
+
+      switch (texture->target) {
+      case PIPE_BUFFER:
+         resourceDim = SVGA3D_RESOURCE_BUFFER;
+         break;
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_CUBE_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURECUBE;
+         break;
+
+      default:
+         assert(!"Unexpected texture type");
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      sv->id = util_bitmask_add(svga->sampler_view_id_bm);
+
+      ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc,
+                                                   sv->id,
+                                                   surface,
+                                                   format,
+                                                   resourceDim,
+                                                   &viewDesc);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+         sv->id = SVGA3D_INVALID_ID;
+      }
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+update_sampler_resources(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      unsigned count;
+      unsigned nviews;
+      unsigned i;
+
+      count = svga->curr.num_sampler_views[shader];
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+         struct svga_winsys_surface *surface;
+
+         if (sv) {
+            surface = svga_resource_handle(sv->base.texture);
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            assert(sv->id != SVGA3D_INVALID_ID);
+            ids[i] = sv->id;
+         }
+         else {
+            surface = NULL;
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+         surfaces[i] = surface;
+      }
+
+      for (; i < Elements(ids); i++) {
+         ids[i] = SVGA3D_INVALID_ID;
+         surfaces[i] = NULL;
+      }
+
+      if (shader == PIPE_SHADER_FRAGMENT) {
+         /* Handle polygon stipple sampler view */
+         if (svga->curr.rast->templ.poly_stipple_enable) {
+            const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+            struct svga_pipe_sampler_view *sv =
+               svga->polygon_stipple.sampler_view;
+
+            assert(sv);
+            if (!sv) {
+               return PIPE_OK;  /* probably out of memory */
+            }
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            ids[unit] = sv->id;
+            surfaces[unit] = svga_resource_handle(sv->base.texture);
+            count = MAX2(count, unit+1);
+         }
+      }
+
+      /* Number of ShaderResources that need to be modified. This includes
+       * the one that need to be unbound.
+       */
+      nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count);
+      if (nviews > 0) {
+         ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+                                                svga_shader_type(shader),
+                                                0, /* startView */
+                                                nviews,
+                                                ids,
+                                                surfaces);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+
+      /* Number of sampler views enabled in the device */
+      svga->state.hw_draw.num_sampler_views[shader] = count;
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler_bindings = {
+   "shader resources emit",
+   SVGA_NEW_STIPPLE |
+   SVGA_NEW_TEXTURE_BINDING,
+   update_sampler_resources
+};
+
+
+
+static enum pipe_error
+update_samplers(struct svga_context *svga, unsigned dirty )
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      const unsigned count = svga->curr.num_samplers[shader];
+      SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+      unsigned i;
+
+      for (i = 0; i < count; i++) {
+         if (svga->curr.sampler[shader][i]) {
+            ids[i] = svga->curr.sampler[shader][i]->id;
+            assert(ids[i] != SVGA3D_INVALID_ID);
+         }
+         else {
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+      }
+
+      if (count > 0) {
+         ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                         count,
+                                         0,                        /* start */
+                                         svga_shader_type(shader), /* type */
+                                         ids);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   /* Handle polygon stipple sampler texture */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
+
+      assert(sampler);
+      if (!sampler) {
+         return PIPE_OK; /* probably out of memory */
+      }
+
+      ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                      1, /* count */
+                                      unit, /* start */
+                                      SVGA3D_SHADERTYPE_PS,
+                                      &sampler->id);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler = {
+   "texture sampler emit",
+   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
+    SVGA_NEW_TEXTURE_FLAGS),
+   update_samplers
+};
-- 
cgit v1.2.3


From 8ddf98d671f39df9c97e66f7bbe9d00aefc8707a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 16:04:03 -0600
Subject: svga: add new svga_state_tgsi_transform.c file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../drivers/svga/svga_state_tgsi_transform.c       | 293 +++++++++++++++++++++
 1 file changed, 293 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_state_tgsi_transform.c

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/src/gallium/drivers/svga/svga_state_tgsi_transform.c
new file mode 100644
index 00000000000..023c5862d27
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_tgsi_transform.c
@@ -0,0 +1,293 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_simple_shaders.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_point_sprite.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "svga_context.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+
+/**
+ * Bind a new GS.  This updates the derived current gs state, not the
+ * user-specified GS state.
+ */
+static void
+bind_gs_state(struct svga_context *svga,
+              struct svga_geometry_shader *gs)
+{
+   svga->curr.gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+/**
+ * emulate_point_sprite searches the shader variants list to see it there is
+ * a shader variant with a token string that matches the emulation
+ * requirement. It there isn't, then it will use a tgsi utility
+ * tgsi_add_point_sprite to transform the original token string to support
+ * point sprite. A new geometry shader state will be created with the
+ * transformed token string and added to the shader variants list of the
+ * original geometry shader. The new geometry shader state will then be
+ * bound as the current geometry shader.
+ */
+static struct svga_shader *
+emulate_point_sprite(struct svga_context *svga,
+                     struct svga_shader *shader,
+                     const struct tgsi_token *tokens)
+{
+   struct svga_token_key key;
+   struct tgsi_token *new_tokens;
+   const struct tgsi_token *orig_tokens;
+   struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *gs = NULL;
+   struct pipe_shader_state templ;
+   struct svga_stream_output *streamout = NULL;
+   int pos_out_index = -1;
+   int aa_point_coord_index = -1;
+
+   assert(tokens != NULL);
+
+   orig_tokens = tokens;
+
+   /* Create a token key */
+   memset(&key, 0, sizeof key);
+   key.gs.writes_psize = 1;
+   key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+
+   key.gs.sprite_origin_upper_left =
+      !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   key.gs.aa_point = svga->curr.rast->templ.point_smooth;
+
+   if (orig_gs != NULL) {
+
+      /* Check if the original geometry shader has stream output and
+       * if position is one of the outputs.
+       */
+      streamout = orig_gs->base.stream_output;
+      if (streamout != NULL) {
+         pos_out_index = streamout->pos_out_index;
+         key.gs.point_pos_stream_out = pos_out_index != -1;
+      }
+
+      /* Search the shader lists to see if there is a variant that matches
+       * this token key.
+       */
+      gs = (struct svga_geometry_shader *)
+              svga_search_shader_token_key(&orig_gs->base, &key);
+   }
+
+   /* If there isn't, then call the tgsi utility tgsi_add_point_sprite
+    * to transform the original tokens to support point sprite.
+    * Flip the sprite origin as SVGA3D device only supports an
+    * upper-left origin.
+    */
+   if (!gs) {
+      new_tokens = tgsi_add_point_sprite(orig_tokens,
+                                         key.gs.sprite_coord_enable,
+                                         key.gs.sprite_origin_upper_left,
+                                         key.gs.point_pos_stream_out,
+                                         key.gs.aa_point ?
+                                            &aa_point_coord_index : NULL);
+
+      if (new_tokens == NULL) {
+         /* if no new tokens are generated for whatever reason, just return */
+         return NULL;
+      }
+
+      if (0) {
+         debug_printf("Before tgsi_add_point_sprite ---------------\n");
+         tgsi_dump(orig_tokens, 0);
+         debug_printf("After tgsi_add_point_sprite --------------\n");
+         tgsi_dump(new_tokens, 0);
+      }
+
+      templ.tokens = new_tokens;
+      templ.stream_output.num_outputs = 0;
+
+      if (streamout != NULL) {
+         templ.stream_output = streamout->info;
+         /* The tgsi_add_point_sprite utility adds an extra output
+          * for the original point position for stream output purpose.
+          * We need to replace the position output register index in the
+          * stream output declaration with the new register index.
+          */
+         if (pos_out_index != -1) {
+            assert(orig_gs != NULL);
+            templ.stream_output.output[pos_out_index].register_index =
+               orig_gs->base.info.num_outputs;
+         }
+      }
+
+      /* Create a new geometry shader state with the new tokens */
+      gs = svga->pipe.create_gs_state(&svga->pipe, &templ);
+
+      /* Don't need the token string anymore. There is a local copy
+       * in the shader state.
+       */
+      FREE(new_tokens);
+
+      if (!gs) {
+         return NULL;
+      }
+
+      gs->wide_point = TRUE;
+      gs->aa_point_coord_index = aa_point_coord_index;
+      gs->base.token_key = key;
+      gs->base.parent = &orig_gs->base;
+      gs->base.next = NULL;
+
+      /* Add the new geometry shader to the head of the shader list
+       * pointed to by the original geometry shader.
+       */
+      if (orig_gs != NULL) {
+         gs->base.next = orig_gs->base.next;
+         orig_gs->base.next = &gs->base;
+      }
+   }
+
+   /* Bind the new geometry shader state */
+   bind_gs_state(svga, gs);
+
+   return &gs->base;
+}
+
+/**
+ * Generate a geometry shader that emits a wide point by drawing a quad.
+ * This function first creates a passthrough geometry shader and then
+ * calls emulate_point_sprite() to transform the geometry shader to
+ * support point sprite.
+ */
+static struct svga_shader *
+add_point_sprite_shader(struct svga_context *svga)
+{
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *orig_gs = vs->gs;
+   struct svga_geometry_shader *new_gs;
+   const struct tgsi_token *tokens;
+
+   if (orig_gs == NULL) {
+
+      /* If this is the first time adding a geometry shader to this
+       * vertex shader to support point sprite, then create
+       * a passthrough geometry shader first.
+       */
+      orig_gs = (struct svga_geometry_shader *)
+                   util_make_geometry_passthrough_shader(
+                      &svga->pipe, vs->base.info.num_outputs,
+                      vs->base.info.output_semantic_name,
+                      vs->base.info.output_semantic_index);
+
+      if (orig_gs == NULL)
+         return NULL;
+   }
+   else {
+      if (orig_gs->base.parent)
+         orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent;
+   }
+   tokens = orig_gs->base.tokens;
+
+   /* Call emulate_point_sprite to find or create a transformed
+    * geometry shader for supporting point sprite.
+    */
+   new_gs = (struct svga_geometry_shader *)
+               emulate_point_sprite(svga, &orig_gs->base, tokens);
+
+   /* If this is the first time creating a geometry shader to
+    * support vertex point size, then add the new geometry shader
+    * to the vertex shader.
+    */
+   if (vs->gs == NULL) {
+      vs->gs = new_gs;
+   }
+
+   return &new_gs->base;
+}
+
+/* update_tgsi_transform provides a hook to transform a shader if needed.
+ */
+static enum pipe_error
+update_tgsi_transform(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_geometry_shader *gs = svga->curr.user_gs;   /* current gs */
+   struct svga_vertex_shader *vs = svga->curr.vs;     /* currently bound vs */
+   struct svga_shader *orig_gs;                       /* original gs */
+   struct svga_shader *new_gs;                        /* new gs */
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
+      /* If the current prim type is POINTS and the current geometry shader
+       * emits wide points, transform the shader to emulate wide points using
+       * quads.
+       */
+      if (gs != NULL && (gs->base.info.writes_psize || gs->wide_point)) {
+         orig_gs = gs->base.parent ? gs->base.parent : &gs->base;
+         new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens);
+      }
+
+      /* If there is not an active geometry shader and the current vertex
+       * shader emits wide point then create a new geometry shader to emulate
+       * wide point.
+       */
+      else if (gs == NULL &&
+               (svga->curr.rast->pointsize > 1.0 ||
+                vs->base.info.writes_psize)) {
+         new_gs = add_point_sprite_shader(svga);
+      }
+      else {
+         /* use the user's GS */
+         bind_gs_state(svga, svga->curr.user_gs);
+      }
+   }
+   else if (svga->curr.gs != svga->curr.user_gs) {
+      /* If current primitive type is not POINTS, then make sure
+       * we don't bind to any of the generated geometry shader
+       */
+      bind_gs_state(svga, svga->curr.user_gs);
+   }
+   (void) new_gs;    /* silence the unused var warning */
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_need_tgsi_transform =
+{
+   "transform shader for optimization",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_REDUCED_PRIMITIVE |
+    SVGA_NEW_RAST),
+   update_tgsi_transform
+};
-- 
cgit v1.2.3


From e8c20d97eb711babcb4f54cc1ac174eaacc0b8a0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:23:51 -0600
Subject: svga: add new svga_streamout.h file

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_streamout.h | 50 +++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 src/gallium/drivers/svga/svga_streamout.h

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h
new file mode 100644
index 00000000000..da0c4457d2e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_streamout.h
@@ -0,0 +1,50 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STREAMOUT_H
+#define SVGA_STREAMOUT_H
+
+struct svga_shader;
+
+struct svga_stream_output {
+   struct pipe_stream_output_info info;
+   unsigned pos_out_index;                  // position output index
+   unsigned id;
+};
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info);
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout);
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout);
+
+#endif /* SVGA_STREAMOUT_H */
-- 
cgit v1.2.3


From 656dac120d27e060986d97e790334f67a34a6e23 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 7 Aug 2015 15:41:17 -0600
Subject: svga: add new version 10 device command prototypes

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_cmd.h | 333 ++++++++++++++++++++++++++++++++++++
 1 file changed, 333 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
index 2348aa65c1d..271ee8ed7ad 100644
--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -47,6 +47,7 @@ struct svga_winsys_context;
 struct svga_winsys_buffer;
 struct svga_winsys_surface;
 struct svga_winsys_gb_shader;
+struct svga_winsys_gb_query;
 
 
 /*
@@ -305,4 +306,336 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
                     SVGA3dQueryType type,
                     struct svga_winsys_buffer *buffer);
 
+
+
+/*
+ * VGPU10 commands
+ */
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box);
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dViewport *viewports);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor, uint32 sampleMask);
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects);
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount, uint32 startVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount, uint32 startIndexLocation,
+                          int32 baseVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset);
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags, uint16 stencil, float depth);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surf,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                            SVGA3dShaderResourceViewId shaderResourceViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                SVGA3dRenderTargetViewId renderTargetViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                SVGA3dDepthStencilViewId depthStencilViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                               unsigned count,
+                               SVGA3dElementLayoutId elementLayoutId,
+                               const SVGA3dInputElementDesc *elements);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                       SVGA3dDepthStencilStateId depthStencilId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+      SVGA3dStreamOutputId soid,
+      uint32 numOutputStreamEntries,
+      uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+      const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource);
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology);
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format, uint32 offset);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource);
+
 #endif /* __SVGA3D_H__ */
-- 
cgit v1.2.3


From e054251ed11e25a080f64b92db9334c9b07c8c76 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 13 Aug 2015 11:00:58 -0700
Subject: svga: update driver for version 10 GPU interface

This is a squash commit of roughly two years of development work.
Authors include:
  Brian Paul
  Charmaine Lee
  Thomas Hellstrom
  Jakob Bornecrantz
  Sinclair Yeh
  Mingcheng Chen
  Kai Ninomiya
  MengLin Wu

The driver supports OpenGL 3.3.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/Makefile.sources          |   29 +-
 src/gallium/drivers/svga/svga_cmd.c                |   16 +-
 src/gallium/drivers/svga/svga_context.c            |  218 +-
 src/gallium/drivers/svga/svga_context.h            |  250 ++-
 src/gallium/drivers/svga/svga_debug.h              |   31 +-
 src/gallium/drivers/svga/svga_draw.c               |  597 ++++--
 src/gallium/drivers/svga/svga_draw.h               |   66 +-
 src/gallium/drivers/svga/svga_draw_arrays.c        |   22 +-
 src/gallium/drivers/svga/svga_draw_elements.c      |   28 +-
 src/gallium/drivers/svga/svga_draw_private.h       |   44 +-
 src/gallium/drivers/svga/svga_format.c             | 2075 ++++++++++++++++----
 src/gallium/drivers/svga/svga_format.h             |   40 +
 src/gallium/drivers/svga/svga_pipe_blend.c         |  190 +-
 src/gallium/drivers/svga/svga_pipe_blit.c          |   28 +-
 src/gallium/drivers/svga/svga_pipe_clear.c         |  135 +-
 src/gallium/drivers/svga/svga_pipe_constants.c     |   42 +-
 src/gallium/drivers/svga/svga_pipe_depthstencil.c  |  128 +-
 src/gallium/drivers/svga/svga_pipe_draw.c          |  111 +-
 src/gallium/drivers/svga/svga_pipe_fs.c            |   33 +-
 src/gallium/drivers/svga/svga_pipe_gs.c            |    6 +-
 src/gallium/drivers/svga/svga_pipe_misc.c          |   73 +-
 src/gallium/drivers/svga/svga_pipe_query.c         |  992 ++++++++--
 src/gallium/drivers/svga/svga_pipe_rasterizer.c    |  158 +-
 src/gallium/drivers/svga/svga_pipe_sampler.c       |  231 ++-
 src/gallium/drivers/svga/svga_pipe_streamout.c     |    2 +-
 src/gallium/drivers/svga/svga_pipe_vertex.c        |  249 ++-
 src/gallium/drivers/svga/svga_pipe_vs.c            |   60 +-
 src/gallium/drivers/svga/svga_resource.c           |   11 +-
 src/gallium/drivers/svga/svga_resource_buffer.c    |   58 +-
 src/gallium/drivers/svga/svga_resource_buffer.h    |    5 +
 .../drivers/svga/svga_resource_buffer_upload.c     |   98 +-
 src/gallium/drivers/svga/svga_resource_texture.c   |  507 +++--
 src/gallium/drivers/svga/svga_resource_texture.h   |   54 +-
 src/gallium/drivers/svga/svga_sampler_view.c       |    9 +-
 src/gallium/drivers/svga/svga_sampler_view.h       |    4 +
 src/gallium/drivers/svga/svga_screen.c             |  441 +++--
 src/gallium/drivers/svga/svga_screen.h             |    6 +-
 src/gallium/drivers/svga/svga_screen_cache.c       |   95 +-
 src/gallium/drivers/svga/svga_screen_cache.h       |    8 +-
 src/gallium/drivers/svga/svga_shader.c             |  463 ++++-
 src/gallium/drivers/svga/svga_shader.h             |  278 ++-
 src/gallium/drivers/svga/svga_state.c              |   74 +-
 src/gallium/drivers/svga/svga_state.h              |   10 +
 src/gallium/drivers/svga/svga_state_constants.c    |  588 ++++--
 src/gallium/drivers/svga/svga_state_framebuffer.c  |  340 +++-
 src/gallium/drivers/svga/svga_state_fs.c           |  298 +--
 src/gallium/drivers/svga/svga_state_need_swtnl.c   |    3 +-
 src/gallium/drivers/svga/svga_state_rss.c          |  162 +-
 src/gallium/drivers/svga/svga_state_sampler.c      |   10 +-
 src/gallium/drivers/svga/svga_state_tss.c          |  291 ++-
 src/gallium/drivers/svga/svga_state_vdecl.c        |   47 +-
 src/gallium/drivers/svga/svga_state_vs.c           |  299 ++-
 src/gallium/drivers/svga/svga_surface.c            |  419 +++-
 src/gallium/drivers/svga/svga_surface.h            |   14 +-
 src/gallium/drivers/svga/svga_swtnl_backend.c      |   74 +-
 src/gallium/drivers/svga/svga_swtnl_draw.c         |   33 +-
 src/gallium/drivers/svga/svga_swtnl_private.h      |    4 +
 src/gallium/drivers/svga/svga_swtnl_state.c        |  169 +-
 src/gallium/drivers/svga/svga_tgsi.c               |  145 +-
 src/gallium/drivers/svga/svga_tgsi.h               |  125 +-
 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c     |   16 +-
 src/gallium/drivers/svga/svga_tgsi_emit.h          |    3 +
 src/gallium/drivers/svga/svga_tgsi_insn.c          |  105 +-
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c        |    4 +-
 64 files changed, 8705 insertions(+), 2419 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/Makefile.sources b/src/gallium/drivers/svga/Makefile.sources
index 276e6a8e204..0bd4f289781 100644
--- a/src/gallium/drivers/svga/Makefile.sources
+++ b/src/gallium/drivers/svga/Makefile.sources
@@ -1,6 +1,7 @@
 C_SOURCES := \
 	svga_cmd.c \
 	svga_cmd.h \
+	svga_cmd_vgpu10.c \
 	svga_context.c \
 	svga_context.h \
 	svga_debug.h \
@@ -12,6 +13,7 @@ C_SOURCES := \
 	svga_format.c \
 	svga_format.h \
 	svga_hw_reg.h \
+	svga_link.c \
 	svga_pipe_blend.c \
 	svga_pipe_blit.c \
 	svga_pipe_clear.c \
@@ -20,10 +22,12 @@ C_SOURCES := \
 	svga_pipe_draw.c \
 	svga_pipe_flush.c \
 	svga_pipe_fs.c \
+	svga_pipe_gs.c \
 	svga_pipe_misc.c \
 	svga_pipe_query.c \
 	svga_pipe_rasterizer.c \
 	svga_pipe_sampler.c \
+        svga_pipe_streamout.c \
 	svga_pipe_vertex.c \
 	svga_pipe_vs.c \
 	svga_public.h \
@@ -44,14 +48,17 @@ C_SOURCES := \
 	svga_shader.c \
 	svga_shader.h \
 	svga_state.c \
+	svga_state.h \
 	svga_state_constants.c \
 	svga_state_framebuffer.c \
-	svga_state_fs.c \
-	svga_state.h \
 	svga_state_need_swtnl.c \
 	svga_state_rss.c \
 	svga_state_tss.c \
 	svga_state_vdecl.c \
+	svga_state_sampler.c \
+	svga_state_fs.c \
+	svga_state_gs.c \
+	svga_state_tgsi_transform.c \
 	svga_state_vs.c \
 	svga_surface.c \
 	svga_surface.h \
@@ -65,6 +72,7 @@ C_SOURCES := \
 	svga_tgsi_emit.h \
 	svga_tgsi.h \
 	svga_tgsi_insn.c \
+	svga_tgsi_vgpu10.c \
 	svga_winsys.h \
 	\
 	svgadump/svga_dump.c \
@@ -74,20 +82,3 @@ C_SOURCES := \
 	svgadump/svga_shader.h \
 	svgadump/svga_shader_op.c \
 	svgadump/svga_shader_op.h
-
-SVGA_H_FILES := \
-	include/includeCheck.h \
-	include/svga3d_caps.h \
-	include/svga3d_cmd.h \
-	include/svga3d_devcaps.h \
-	include/svga3d_limits.h \
-	include/svga3d_reg.h \
-	include/svga3d_shaderdefs.h \
-	include/svga3d_surfacedefs.h \
-	include/svga3d_types.h \
-	include/svga_escape.h \
-	include/svga_overlay.h \
-	include/svga_reg.h \
-	include/svga_types.h \
-	include/vmware_pack_begin.h \
-	include/vmware_pack_end.h
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 1f64980a8ef..d3cf52f08e2 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -66,7 +66,7 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN
    if (surface) {
       struct svga_surface *s = svga_surface(surface);
       swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
-      id->face = s->real_face; /* faces have the same order */
+      id->face = s->real_layer; /* faces have the same order */
       id->mipmap = s->real_level;
    }
    else {
@@ -460,7 +460,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
 
    swc->surface_relocation(swc, &cmd->host.sid, NULL,
                            texture->handle, surface_flags);
-   cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
    cmd->host.mipmap = st->base.level;
 
    cmd->transfer = transfer;
@@ -842,6 +842,8 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
 {
    SVGA3dCmdSetShader *cmd;
 
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
+
    cmd = SVGA3D_FIFOReserve(swc,
                             SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
                             0);
@@ -1385,7 +1387,7 @@ SVGA3D_BeginGBQuery(struct svga_winsys_context *swc,
    if(!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->commit(swc);
@@ -1465,7 +1467,7 @@ SVGA3D_EndGBQuery(struct svga_winsys_context *swc,
    if(!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1552,7 +1554,7 @@ SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc,
    if(!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
 
    swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1642,6 +1644,8 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                    struct svga_winsys_gb_shader *gbshader)
 {
    SVGA3dCmdSetShader *cmd;
+
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
    
    cmd = SVGA3D_FIFOReserve(swc,
                             SVGA_3D_CMD_SET_SHADER,
@@ -1650,7 +1654,7 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
    if (!cmd)
       return PIPE_ERROR_OUT_OF_MEMORY;
    
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
    cmd->type = type;
    if (gbshader)
       swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 673d17ad4a7..2bf795de22d 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -30,6 +30,7 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
@@ -42,6 +43,10 @@
 #include "svga_draw.h"
 #include "svga_debug.h"
 #include "svga_state.h"
+#include "svga_winsys.h"
+
+#define CONST0_UPLOAD_DEFAULT_SIZE 65536
+#define CONST0_UPLOAD_ALIGNMENT 256
 
 DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
 DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
@@ -53,27 +58,67 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE",
 static void svga_destroy( struct pipe_context *pipe )
 {
    struct svga_context *svga = svga_context( pipe );
-   struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
-   unsigned shader;
+   unsigned shader, i;
+
+   /* free any alternate rasterizer states used for point sprite */
+   for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) {
+      if (svga->rasterizer_no_cull[i]) {
+         pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
+      }
+   }
+
+   /* free polygon stipple state */
+   if (svga->polygon_stipple.sampler) {
+      pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler);
+   }
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* free HW constant buffers */
+   for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) {
+      pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL);
+   }
+
+   pipe->delete_blend_state(pipe, svga->noop_blend);
+
+   /* free query gb object */
+   if (svga->gb_query) {
+      pipe->destroy_query(pipe, NULL);
+      svga->gb_query = NULL;
+   }
 
    util_blitter_destroy(svga->blitter);
 
    svga_cleanup_framebuffer( svga );
    svga_cleanup_tss_binding( svga );
 
-   svga_hwtnl_destroy( svga->hwtnl );
-
    svga_cleanup_vertex_state(svga);
    
-   svga->swc->destroy(svga->swc);
-   
    svga_destroy_swtnl( svga );
+   svga_hwtnl_destroy( svga->hwtnl );
 
-   util_bitmask_destroy( svga->shader_id_bm );
+   svga->swc->destroy(svga->swc);
 
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
+   u_upload_destroy(svga->const0_upload);
+
+   /* free user's constant buffers */
    for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
-      pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
-      sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+      for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) {
+         pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL);
+      }
    }
 
    FREE( svga );
@@ -90,7 +135,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
 
    svga = CALLOC_STRUCT(svga_context);
    if (svga == NULL)
-      goto no_svga;
+      goto cleanup;
 
    LIST_INITHEAD(&svga->dirty_buffers);
 
@@ -100,8 +145,8 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
    svga->pipe.clear = svga_clear;
 
    svga->swc = svgascreen->sws->context_create(svgascreen->sws);
-   if(!svga->swc)
-      goto no_swc;
+   if (!svga->swc)
+      goto cleanup;
 
    svga_init_resource_functions(svga);
    svga_init_blend_functions(svga);
@@ -114,11 +159,15 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
    svga_init_sampler_functions(svga);
    svga_init_fs_functions(svga);
    svga_init_vs_functions(svga);
+   svga_init_gs_functions(svga);
    svga_init_vertex_functions(svga);
    svga_init_constbuffer_functions(svga);
    svga_init_query_functions(svga);
    svga_init_surface_functions(svga);
+   svga_init_stream_output_functions(svga);
 
+   /* init misc state */
+   svga->curr.sample_mask = ~0;
 
    /* debug */
    svga->debug.no_swtnl = debug_get_option_no_swtnl();
@@ -128,21 +177,54 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
    svga->debug.no_line_width = debug_get_option_no_line_width();
    svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
 
-   svga->shader_id_bm = util_bitmask_create();
-   if (svga->shader_id_bm == NULL)
-      goto no_shader_bm;
+   if (!(svga->blend_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->ds_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->input_element_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->rast_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->shader_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->surface_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->stream_output_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->query_id_bm = util_bitmask_create()))
+      goto cleanup;
 
    svga->hwtnl = svga_hwtnl_create(svga);
    if (svga->hwtnl == NULL)
-      goto no_hwtnl;
+      goto cleanup;
 
    if (!svga_init_swtnl(svga))
-      goto no_swtnl;
+      goto cleanup;
 
    ret = svga_emit_initial_state( svga );
    if (ret != PIPE_OK)
-      goto no_state;
-   
+      goto cleanup;
+
+   svga->const0_upload = u_upload_create(&svga->pipe,
+                                         CONST0_UPLOAD_DEFAULT_SIZE,
+                                         CONST0_UPLOAD_ALIGNMENT,
+                                         PIPE_BIND_CONSTANT_BUFFER);
+   if (!svga->const0_upload)
+      goto cleanup;
+
    /* Avoid shortcircuiting state with initial value of zero.
     */
    memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
@@ -151,24 +233,64 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
 
    memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
    memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   memset(&svga->state.hw_draw.num_sampler_views, 0,
+      sizeof(svga->state.hw_draw.num_sampler_views));
    svga->state.hw_draw.num_views = 0;
-   memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));
+
+   /* Initialize the shader pointers */
+   svga->state.hw_draw.vs = NULL;
+   svga->state.hw_draw.gs = NULL;
+   svga->state.hw_draw.fs = NULL;
+   memset(svga->state.hw_draw.constbuf, 0,
+          sizeof(svga->state.hw_draw.constbuf));
+   memset(svga->state.hw_draw.default_constbuf_size, 0,
+          sizeof(svga->state.hw_draw.default_constbuf_size));
+   memset(svga->state.hw_draw.enabled_constbufs, 0,
+          sizeof(svga->state.hw_draw.enabled_constbufs));
+
+   /* Create a no-operation blend state which we will bind whenever the
+    * requested blend state is impossible (e.g. due to having an integer
+    * render target attached).
+    *
+    * XXX: We will probably actually need 16 of these, one for each possible
+    * RGBA color mask (4 bits).  Then, we would bind the one with a color mask
+    * matching the blend state it is replacing.
+    */
+   {
+      struct pipe_blend_state noop_tmpl = {0};
+      unsigned i;
+
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+         // Set the color mask to all-ones.  Later this may change.
+         noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA;
+      }
+      svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
+   }
 
    svga->dirty = ~0;
 
    return &svga->pipe;
 
-no_state:
+cleanup:
    svga_destroy_swtnl(svga);
-no_swtnl:
-   svga_hwtnl_destroy( svga->hwtnl );
-no_hwtnl:
-   util_bitmask_destroy( svga->shader_id_bm );
-no_shader_bm:
-   svga->swc->destroy(svga->swc);
-no_swc:
+
+   if (svga->const0_upload)
+      u_upload_destroy(svga->const0_upload);
+   if (svga->hwtnl)
+      svga_hwtnl_destroy(svga->hwtnl);
+   if (svga->swc)
+      svga->swc->destroy(svga->swc);
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
    FREE(svga);
-no_svga:
    return NULL;
 }
 
@@ -195,11 +317,19 @@ void svga_context_flush( struct svga_context *svga,
    /* To force the re-emission of rendertargets and texture sampler bindings on
     * the next command buffer.
     */
-   svga->rebind.rendertargets = TRUE;
-   svga->rebind.texture_samplers = TRUE;
+   svga->rebind.flags.rendertargets = TRUE;
+   svga->rebind.flags.texture_samplers = TRUE;
+
    if (svga_have_gb_objects(svga)) {
-      svga->rebind.vs = TRUE;
-      svga->rebind.fs = TRUE;
+
+      svga->rebind.flags.constbufs = TRUE;
+      svga->rebind.flags.vs = TRUE;
+      svga->rebind.flags.fs = TRUE;
+      svga->rebind.flags.gs = TRUE;
+
+      if (svga_need_to_rebind_resources(svga)) {
+         svga->rebind.flags.query = TRUE;
+      }
    }
 
    if (SVGA_DEBUG & DEBUG_SYNC) {
@@ -215,6 +345,26 @@ void svga_context_flush( struct svga_context *svga,
 }
 
 
+/**
+ * Flush pending commands and wait for completion with a fence.
+ */
+void
+svga_context_finish(struct svga_context *svga)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct pipe_fence_handle *fence = NULL;
+
+   svga_context_flush(svga, &fence);
+   svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+   screen->fence_reference(screen, &fence, NULL);
+}
+
+
+/**
+ * Emit pending drawing commands to the command buffer.
+ * If the command buffer overflows, we flush it and retry.
+ * \sa svga_hwtnl_flush()
+ */
 void svga_hwtnl_flush_retry( struct svga_context *svga )
 {
    enum pipe_error ret = PIPE_OK;
@@ -225,7 +375,7 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
       ret = svga_hwtnl_flush( svga->hwtnl );
    }
 
-   assert(ret == 0);
+   assert(ret == PIPE_OK);
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 2726346bc50..e8575f36c3b 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -38,7 +38,6 @@
 
 #include "svga_screen.h"
 #include "svga_state.h"
-#include "svga_tgsi.h"
 #include "svga_winsys.h"
 #include "svga_hw_reg.h"
 #include "svga3d_shaderdefs.h"
@@ -48,7 +47,19 @@
 #define SVGA_QUERY_DRAW_CALLS   (PIPE_QUERY_DRIVER_SPECIFIC + 0)
 #define SVGA_QUERY_FALLBACKS    (PIPE_QUERY_DRIVER_SPECIFIC + 1)
 #define SVGA_QUERY_MEMORY_USED  (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_MAX          (PIPE_QUERY_DRIVER_SPECIFIC + 3)
 
+/**
+ * Maximum supported number of constant buffers per shader
+ */
+#define SVGA_MAX_CONST_BUFS 14
+
+/**
+ * Maximum constant buffer size that can be set in the
+ * DXSetSingleConstantBuffer command is
+ * DX10 constant buffer element count * 4 4-bytes components
+ */
+#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
 
 struct draw_vertex_shader;
 struct draw_fragment_shader;
@@ -57,49 +68,16 @@ struct SVGACmdMemory;
 struct util_bitmask;
 
 
-struct svga_shader
-{
-   const struct tgsi_token *tokens;
-
-   struct tgsi_shader_info info;
-
-   /** Head of linked list of variants */
-   struct svga_shader_variant *variants;
-
-   unsigned id;  /**< for debugging only */
-};
-
-
-struct svga_fragment_shader
-{
-   struct svga_shader base;
-
-   struct draw_fragment_shader *draw_shader;
-
-   /** Mask of which generic varying variables are read by this shader */
-   unsigned generic_inputs;
-   /** Table mapping original TGSI generic indexes to low integers */
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-struct svga_vertex_shader
-{
-   struct svga_shader base;
-
-   struct draw_vertex_shader *draw_shader;
-};
-
-
 struct svga_cache_context;
 struct svga_tracked_state;
 
 struct svga_blend_state {
+   unsigned need_white_fragments:1;
+   unsigned independent_blend_enable:1;
+   unsigned alpha_to_coverage:1;
+   unsigned blend_color_alpha:1;  /**< set blend color to alpha value */
 
-   boolean need_white_fragments;
-
-   /* Should be per-render-target:
-    */
+   /** Per-render target state */
    struct {
       uint8_t writemask;
 
@@ -112,8 +90,9 @@ struct svga_blend_state {
       uint8_t srcblend_alpha;
       uint8_t dstblend_alpha;
       uint8_t blendeq_alpha;
+   } rt[PIPE_MAX_COLOR_BUFS];
 
-   } rt[1];
+   SVGA3dBlendStateId id;  /**< vgpu10 */
 };
 
 struct svga_depth_stencil_state {
@@ -139,6 +118,8 @@ struct svga_depth_stencil_state {
    unsigned stencil_writemask:8;
 
    float    alpharef;
+
+   SVGA3dDepthStencilStateId id;  /**< vgpu10 */
 };
 
 #define SVGA_UNFILLED_DISABLE 0
@@ -167,11 +148,13 @@ struct svga_rasterizer_state {
    float pointsize;
    float linewidth;
    
-   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned hw_fillmode:2;         /* PIPE_POLYGON_MODE_x */
 
    /** Which prims do we need help for?  Bitmask of (1 << PIPE_PRIM_x) flags */
    unsigned need_pipeline:16;
 
+   SVGA3dRasterizerStateId id;    /**< vgpu10 */
+
    /** For debugging: */
    const char* need_pipeline_tris_str;
    const char* need_pipeline_lines_str;
@@ -195,15 +178,45 @@ struct svga_sampler_state {
    unsigned min_lod;
    unsigned view_min_lod;
    unsigned view_max_lod;
+
+   SVGA3dSamplerId id;
 };
 
+
+struct svga_pipe_sampler_view
+{
+   struct pipe_sampler_view base;
+
+   SVGA3dShaderResourceViewId id;
+};
+
+
+static inline struct svga_pipe_sampler_view *
+svga_pipe_sampler_view(struct pipe_sampler_view *v)
+{
+   return (struct svga_pipe_sampler_view *) v;
+}
+
+
 struct svga_velems_state {
    unsigned count;
    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
    SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
-   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
-   unsigned adjust_attrib_w_1;   /* bitmask of attrs needing w = 1 */
+
+   /** Bitmasks indicating which attributes need format conversion */
+   unsigned adjust_attrib_range;     /**< range adjustment */
+   unsigned attrib_is_pure_int;      /**< pure int */
+   unsigned adjust_attrib_w_1;       /**< set w = 1 */
+   unsigned adjust_attrib_itof;      /**< int->float */
+   unsigned adjust_attrib_utof;      /**< uint->float */
+   unsigned attrib_is_bgra;          /**< R / B swizzling */
+   unsigned attrib_puint_to_snorm;   /**< 10_10_10_2 packed uint -> snorm */
+   unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */
+   unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */
+
    boolean need_swvfetch;
+
+   SVGA3dElementLayoutId id; /**< VGPU10 */
 };
 
 /* Use to calculate differences between state emitted to hardware and
@@ -214,16 +227,22 @@ struct svga_state
    const struct svga_blend_state *blend;
    const struct svga_depth_stencil_state *depth;
    const struct svga_rasterizer_state *rast;
-   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
    const struct svga_velems_state *velems;
 
-   struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
    struct svga_fragment_shader *fs;
    struct svga_vertex_shader *vs;
+   struct svga_geometry_shader *user_gs; /* user-specified GS */
+   struct svga_geometry_shader *gs;      /* derived GS */
 
    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
    struct pipe_index_buffer ib;
-   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];
+   /** Constant buffers for each shader.
+    * The size should probably always match with that of
+    * svga_shader_emitter_v10.num_shader_consts.
+    */
+   struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
 
    struct pipe_framebuffer_state framebuffer;
    float depthscale;
@@ -240,8 +259,8 @@ struct svga_state
    struct pipe_clip_state clip;
    struct pipe_viewport_state viewport;
 
-   unsigned num_samplers;
-   unsigned num_sampler_views;
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
    unsigned num_vertex_buffers;
    unsigned reduced_prim;
 
@@ -249,6 +268,8 @@ struct svga_state
       unsigned flag_1d;
       unsigned flag_srgb;
    } tex_flags;
+
+   unsigned sample_mask;
 };
 
 struct svga_prescale {
@@ -262,9 +283,7 @@ struct svga_prescale {
  */
 struct svga_hw_clear_state
 {
-   struct {
-      unsigned x,y,w,h;
-   } viewport;
+   SVGA3dRect viewport;
 
    struct {
       float zmin, zmax;
@@ -291,16 +310,29 @@ struct svga_hw_draw_state
    unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
    float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];
 
-   /**
-    * For guest backed shader constants only.
-    */
-   struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
-
    struct svga_shader_variant *fs;
    struct svga_shader_variant *vs;
+   struct svga_shader_variant *gs;
    struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
-
    unsigned num_views;
+   struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+
+   /* Bitmask of enabled constant bufffers */
+   unsigned enabled_constbufs[PIPE_SHADER_TYPES];
+
+   /* VGPU10 HW state (used to prevent emitting redundant state) */
+   SVGA3dDepthStencilStateId depth_stencil_id;
+   unsigned stencil_ref;
+   SVGA3dBlendStateId blend_id;
+   float blend_factor[4];
+   unsigned blend_sample_mask;
+   SVGA3dRasterizerStateId rasterizer_id;
+   SVGA3dElementLayoutId layout_id;
+   SVGA3dPrimitiveType topology;
+
+   /* used for rebinding */
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+   unsigned default_constbuf_size[PIPE_SHADER_TYPES];
 };
 
 
@@ -326,12 +358,14 @@ struct svga_sw_state
 struct svga_hw_queue;
 
 struct svga_query;
+struct svga_qmem_alloc_entry;
 
 struct svga_context
 {
    struct pipe_context pipe;
    struct svga_winsys_context *swc;
    struct blitter_context *blitter;
+   struct u_upload_mgr *const0_upload;
 
    struct {
       boolean no_swtnl;
@@ -355,12 +389,42 @@ struct svga_context
       boolean new_vdecl;
    } swtnl;
 
+   /* Bitmask of blend state objects IDs */
+   struct util_bitmask *blend_object_id_bm;
+
+   /* Bitmask of depth/stencil state objects IDs */
+   struct util_bitmask *ds_object_id_bm;
+
+   /* Bitmaks of input element object IDs */
+   struct util_bitmask *input_element_object_id_bm;
+
+   /* Bitmask of rasterizer object IDs */
+   struct util_bitmask *rast_object_id_bm;
+
+   /* Bitmask of sampler state objects IDs */
+   struct util_bitmask *sampler_object_id_bm;
+
+   /* Bitmask of sampler view IDs */
+   struct util_bitmask *sampler_view_id_bm;
+
    /* Bitmask of used shader IDs */
    struct util_bitmask *shader_id_bm;
 
+   /* Bitmask of used surface view IDs */
+   struct util_bitmask *surface_view_id_bm;
+
+   /* Bitmask of used stream output IDs */
+   struct util_bitmask *stream_output_id_bm;
+
+   /* Bitmask of used query IDs */
+   struct util_bitmask *query_id_bm;
+
    struct {
       unsigned dirty[SVGA_STATE_MAX];
 
+      /** bitmasks of which const buffers are changed */
+      unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+
       unsigned texture_timestamp;
 
       /* 
@@ -373,17 +437,28 @@ struct svga_context
    struct svga_state curr;      /* state from the state tracker */
    unsigned dirty;              /* statechanges since last update_state() */
 
-   struct {
-      unsigned rendertargets:1;
-      unsigned texture_samplers:1;
-      unsigned vs:1;
-      unsigned fs:1;
+   union {
+      struct {
+         unsigned rendertargets:1;
+         unsigned texture_samplers:1;
+         unsigned constbufs:1;
+         unsigned vs:1;
+         unsigned fs:1;
+         unsigned gs:1;
+         unsigned query:1;
+      } flags;
+      unsigned val;
    } rebind;
 
    struct svga_hwtnl *hwtnl;
 
-   /** The occlusion query currently in progress */
-   struct svga_query *sq;
+   /** Queries states */
+   struct svga_winsys_gb_query *gb_query;     /**< gb query object, one per context */
+   unsigned gb_query_len;                     /**< gb query object size */
+   struct util_bitmask *gb_query_alloc_mask;  /**< gb query object allocation mask */
+   struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
+                                              /**< query mem block mapping */
+   struct svga_query *sq[SVGA_QUERY_MAX];     /**< queries currently in progress */
 
    /** List of buffers with queued transfers */
    struct list_head dirty_buffers;
@@ -391,12 +466,32 @@ struct svga_context
    /** performance / info queries */
    uint64_t num_draw_calls;  /**< SVGA_QUERY_DRAW_CALLS */
    uint64_t num_fallbacks;   /**< SVGA_QUERY_FALLBACKS */
+
+   /** The currently bound stream output targets */
+   unsigned num_so_targets;
+   struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
+   struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
+   struct svga_stream_output *current_so;
+
+   /** A blend state with blending disabled, for falling back to when blending
+    * is illegal (e.g. an integer texture is bound)
+    */
+   struct svga_blend_state *noop_blend;
+
+   struct {
+      struct pipe_resource *texture;
+      struct svga_pipe_sampler_view *sampler_view;
+      void *sampler;
+   } polygon_stipple;
+
+   /** Alternate rasterizer states created for point sprite */
+   struct svga_rasterizer_state *rasterizer_no_cull[2];
 };
 
 /* A flag for each state_tracker state object:
  */
 #define SVGA_NEW_BLEND               0x1
-#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
 #define SVGA_NEW_RAST                0x4
 #define SVGA_NEW_SAMPLER             0x8
 #define SVGA_NEW_TEXTURE             0x10
@@ -422,7 +517,9 @@ struct svga_context
 #define SVGA_NEW_VS_VARIANT          0x1000000
 #define SVGA_NEW_TEXTURE_FLAGS       0x4000000
 #define SVGA_NEW_STENCIL_REF         0x8000000
-
+#define SVGA_NEW_GS                  0x10000000
+#define SVGA_NEW_GS_CONST_BUFFER     0x20000000
+#define SVGA_NEW_GS_VARIANT          0x40000000
 
 
@@ -457,11 +554,13 @@ void svga_init_rasterizer_functions( struct svga_context *svga );
 void svga_init_sampler_functions( struct svga_context *svga );
 void svga_init_fs_functions( struct svga_context *svga );
 void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_gs_functions( struct svga_context *svga );
 void svga_init_vertex_functions( struct svga_context *svga );
 void svga_init_constbuffer_functions( struct svga_context *svga );
 void svga_init_draw_functions( struct svga_context *svga );
 void svga_init_query_functions( struct svga_context *svga );
 void svga_init_surface_functions(struct svga_context *svga);
+void svga_init_stream_output_functions( struct svga_context *svga );
 
 void svga_cleanup_vertex_state( struct svga_context *svga );
 void svga_cleanup_tss_binding( struct svga_context *svga );
@@ -470,6 +569,8 @@ void svga_cleanup_framebuffer( struct svga_context *svga );
 void svga_context_flush( struct svga_context *svga,
                          struct pipe_fence_handle **pfence );
 
+void svga_context_finish(struct svga_context *svga);
+
 void svga_hwtnl_flush_retry( struct svga_context *svga );
 void svga_hwtnl_flush_buffer( struct svga_context *svga,
                               struct pipe_resource *buffer );
@@ -504,5 +605,22 @@ svga_have_gb_dma(const struct svga_context *svga)
    return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
 }
 
+static inline boolean
+svga_have_vgpu10(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_vgpu10;
+}
+
+static inline boolean
+svga_need_to_rebind_resources(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
+}
+
+static inline boolean
+svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
+{
+   return memcmp(r1, r2, sizeof(*r1)) == 0;
+}
 
 #endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
index 82c9b602d5d..039f79d4e99 100644
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -29,21 +29,22 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
-#define DEBUG_DMA      0x1
-#define DEBUG_TGSI     0x4
-#define DEBUG_PIPE     0x8
-#define DEBUG_STATE    0x10
-#define DEBUG_SCREEN   0x20
-#define DEBUG_TEX      0x40
-#define DEBUG_SWTNL    0x80
-#define DEBUG_CONSTS   0x100
-#define DEBUG_VIEWPORT 0x200
-#define DEBUG_VIEWS    0x400
-#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
-#define DEBUG_FLUSH    0x1000   /* flush after every draw */
-#define DEBUG_SYNC     0x2000   /* sync after every flush */
-#define DEBUG_QUERY    0x4000
-#define DEBUG_CACHE    0x8000
+#define DEBUG_DMA          0x1
+#define DEBUG_TGSI         0x4
+#define DEBUG_PIPE         0x8
+#define DEBUG_STATE        0x10
+#define DEBUG_SCREEN       0x20
+#define DEBUG_TEX          0x40
+#define DEBUG_SWTNL        0x80
+#define DEBUG_CONSTS       0x100
+#define DEBUG_VIEWPORT     0x200
+#define DEBUG_VIEWS        0x400
+#define DEBUG_PERF         0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH        0x1000   /* flush after every draw */
+#define DEBUG_SYNC         0x2000   /* sync after every flush */
+#define DEBUG_QUERY        0x4000
+#define DEBUG_CACHE        0x8000
+#define DEBUG_STREAMOUT    0x10000
 
 #ifdef DEBUG
 extern int SVGA_DEBUG;
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 366a2dccdf1..9b6451da2f9 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -26,17 +26,19 @@
 #include "pipe/p_compiler.h"
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_helpers.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
-#include "util/u_upload_mgr.h"
 
 #include "svga_context.h"
 #include "svga_draw.h"
 #include "svga_draw_private.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_resource.h"
 #include "svga_resource_buffer.h"
 #include "svga_resource_texture.h"
+#include "svga_shader.h"
 #include "svga_surface.h"
 #include "svga_winsys.h"
 #include "svga_cmd.h"
@@ -71,8 +73,8 @@ svga_hwtnl_destroy(struct svga_hwtnl *hwtnl)
       }
    }
 
-   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+   for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
+      pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL);
 
    for (i = 0; i < hwtnl->cmd.prim_count; i++)
       pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
@@ -85,45 +87,55 @@ void
 svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
                          boolean flatshade, boolean flatshade_first)
 {
-   hwtnl->hw_pv = PV_FIRST;
+   struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
+
+   /* User-specified PV */
    hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+
+   /* Device supported PV */
+   if (svgascreen->haveProvokingVertex) {
+      /* use the mode specified by the user */
+      hwtnl->hw_pv = hwtnl->api_pv;
+   }
+   else {
+      /* the device only support first provoking vertex */
+      hwtnl->hw_pv = PV_FIRST;
+   }
 }
 
 
 void
-svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode)
+svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
 {
    hwtnl->api_fillmode = mode;
 }
 
 
 void
-svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count)
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layout_id)
 {
-   unsigned i;
-
    assert(hwtnl->cmd.prim_count == 0);
-
-   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
-   }
-
    hwtnl->cmd.vdecl_count = count;
+   hwtnl->cmd.vdecl_layout_id = layout_id;
+   memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
+   memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
+          count * sizeof(unsigned));
 }
 
 
+/**
+ * Specify vertex buffers for hardware drawing.
+ */
 void
-svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl,
-                 unsigned i,
-                 const SVGA3dVertexDecl * decl, struct pipe_resource *vb)
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers)
 {
-   assert(hwtnl->cmd.prim_count == 0);
-
-   assert(i < hwtnl->cmd.vdecl_count);
-
-   hwtnl->cmd.vdecl[i] = *decl;
-
-   pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb);
+   util_set_vertex_buffers_count(hwtnl->cmd.vbufs,
+                                 &hwtnl->cmd.vbuf_count, buffers, 0, count);
 }
 
 
@@ -145,8 +157,8 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
       return FALSE;
    }
 
-   for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) {
-      if (hwtnl->cmd.vdecl_vb[i] == buffer) {
+   for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
+      if (hwtnl->cmd.vbufs[i].buffer == buffer) {
          return TRUE;
       }
    }
@@ -161,120 +173,444 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
 }
 
 
-enum pipe_error
-svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+static enum pipe_error
+draw_vgpu9(struct svga_hwtnl *hwtnl)
 {
    struct svga_winsys_context *swc = hwtnl->cmd.swc;
    struct svga_context *svga = hwtnl->svga;
    enum pipe_error ret;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle[QSZ];
+   struct svga_winsys_surface *handle;
+   SVGA3dVertexDecl *vdecl;
+   SVGA3dPrimitiveRange *prim;
+   unsigned i;
 
-   if (hwtnl->cmd.prim_count) {
-      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
-      struct svga_winsys_surface *ib_handle[QSZ];
-      struct svga_winsys_surface *handle;
-      SVGA3dVertexDecl *vdecl;
-      SVGA3dPrimitiveRange *prim;
-      unsigned i;
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer);
+      if (handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;
 
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i]));
-         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+      vb_handle[i] = handle;
+   }
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      if (hwtnl->cmd.prim_ib[i]) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
          if (handle == NULL)
             return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      else
+         handle = NULL;
+
+      ib_handle[i] = handle;
+   }
+
+   if (svga->rebind.flags.rendertargets) {
+      ret = svga_reemit_framebuffer_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.texture_samplers) {
+      ret = svga_reemit_tss_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.vs) {
+      ret = svga_reemit_vs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.fs) {
+      ret = svga_reemit_fs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+            svga->curr.framebuffer.cbufs[0] ?
+            svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+            hwtnl->cmd.prim_count);
 
-         vb_handle[i] = handle;
+   ret = SVGA3D_BeginDrawPrimitives(swc,
+                                    &vdecl,
+                                    hwtnl->cmd.vdecl_count,
+                                    &prim, hwtnl->cmd.prim_count);
+   if (ret != PIPE_OK)
+      return ret;
+
+   memcpy(vdecl,
+          hwtnl->cmd.vdecl,
+          hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      /* check for 4-byte alignment */
+      assert(vdecl[i].array.offset % 4 == 0);
+      assert(vdecl[i].array.stride % 4 == 0);
+
+      /* Given rangeHint is considered to be relative to indexBias, and
+       * indexBias varies per primitive, we cannot accurately supply an
+       * rangeHint when emitting more than one primitive per draw command.
+       */
+      if (hwtnl->cmd.prim_count == 1) {
+         vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+         vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+      }
+      else {
+         vdecl[i].rangeHint.first = 0;
+         vdecl[i].rangeHint.last = 0;
       }
 
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         if (hwtnl->cmd.prim_ib[i]) {
-            assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i]));
-            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
-            if (handle == NULL)
-               return PIPE_ERROR_OUT_OF_MEMORY;
+      swc->surface_relocation(swc,
+                              &vdecl[i].array.surfaceId,
+                              NULL, vb_handle[i], SVGA_RELOC_READ);
+   }
+
+   memcpy(prim,
+          hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      swc->surface_relocation(swc,
+                              &prim[i].indexArray.surfaceId,
+                              NULL, ib_handle[i], SVGA_RELOC_READ);
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+   }
+
+   SVGA_FIFOCommitAll(swc);
+
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+static SVGA3dSurfaceFormat
+xlate_index_format(unsigned indexWidth)
+{
+   if (indexWidth == 2) {
+      return SVGA3D_R16_UINT;
+   }
+   else if (indexWidth == 4) {
+      return SVGA3D_R32_UINT;
+   }
+   else {
+      assert(!"Bad indexWidth");
+      return SVGA3D_R32_UINT;
+   }
+}
+
+
+static enum pipe_error
+validate_sampler_resources(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      unsigned count = svga->curr.num_sampler_views[shader];
+      unsigned i;
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      enum pipe_error ret;
+
+      /*
+       * Reference bound sampler resources to ensure pending updates are
+       * noticed by the device.
+       */
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+         if (sv) {
+            if (sv->base.texture->target == PIPE_BUFFER) {
+               surfaces[i] = svga_buffer_handle(svga, sv->base.texture);
+            }
+            else {
+               surfaces[i] = svga_texture(sv->base.texture)->handle;
+            }
          }
          else {
-            handle = NULL;
+            surfaces[i] = NULL;
          }
-
-         ib_handle[i] = handle;
       }
 
-      if (svga->rebind.rendertargets) {
-         ret = svga_reemit_framebuffer_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
-         }
+      if (shader == PIPE_SHADER_FRAGMENT &&
+          svga->curr.rast->templ.poly_stipple_enable) {
+         const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+         struct svga_pipe_sampler_view *sv =
+            svga->polygon_stipple.sampler_view;
+
+         assert(sv);
+         surfaces[unit] = svga_texture(sv->base.texture)->handle;
+         count = MAX2(count, unit+1);
       }
 
-      if (svga->rebind.texture_samplers) {
-         ret = svga_reemit_tss_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /* rebind the shader resources if needed */
+      if (svga->rebind.flags.texture_samplers) {
+         for (i = 0; i < count; i++) {
+            if (surfaces[i]) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                surfaces[i],
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
          }
       }
+   }
+   svga->rebind.flags.texture_samplers = FALSE;
 
-      if (svga->rebind.vs) {
-         ret = svga_reemit_vs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+validate_constant_buffers(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      enum pipe_error ret;
+      struct svga_buffer *buffer;
+      struct svga_winsys_surface *handle;
+      unsigned enabled_constbufs;
+
+      /* Rebind the default constant buffer if needed */
+      if (svga->rebind.flags.constbufs) {
+         buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]);
+         if (buffer) {
+            ret = svga->swc->resource_rebind(svga->swc,
+                                             buffer->handle,
+                                             NULL,
+                                             SVGA_RELOC_READ);
+            if (ret != PIPE_OK)
+               return ret;
          }
       }
 
-      if (svga->rebind.fs) {
-         ret = svga_reemit_fs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /*
+       * Reference other bound constant buffers to ensure pending updates are
+       * noticed by the device.
+       */
+      enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
+      while (enabled_constbufs) {
+         unsigned i = u_bit_scan(&enabled_constbufs);
+         buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+         if (buffer) {
+            handle = svga_buffer_handle(svga, &buffer->b.b);
+
+            if (svga->rebind.flags.constbufs) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                handle,
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
          }
       }
+   }
+   svga->rebind.flags.constbufs = FALSE;
+
+   return PIPE_OK;
+}
 
-      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
-               svga->curr.framebuffer.cbufs[0] ?
-               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
-               hwtnl->cmd.prim_count);
 
-      ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count,
-                                       &prim, hwtnl->cmd.prim_count);
+static enum pipe_error
+draw_vgpu10(struct svga_hwtnl *hwtnl,
+            const SVGA3dPrimitiveRange *range,
+            unsigned vcount,
+            unsigned min_index,
+            unsigned max_index, struct pipe_resource *ib,
+            unsigned start_instance, unsigned instance_count)
+{
+   struct svga_context *svga = hwtnl->svga;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle;
+   const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(svga_have_vgpu10(svga));
+   assert(hwtnl->cmd.prim_count == 0);
+
+   /* We need to reemit all the current resource bindings along with the Draw
+    * command to be sure that the referenced resources are available for the
+    * Draw command, just in case the surfaces associated with the resources
+    * are paged out.
+    */
+   if (svga->rebind.val) {
+      ret = svga_rebind_framebuffer_bindings(svga);
       if (ret != PIPE_OK)
          return ret;
 
-      memcpy(vdecl, hwtnl->cmd.vdecl,
-             hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
-
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         /* Given rangeHint is considered to be relative to indexBias, and 
-          * indexBias varies per primitive, we cannot accurately supply an 
-          * rangeHint when emitting more than one primitive per draw command.
-          */
-         if (hwtnl->cmd.prim_count == 1) {
-            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
-            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
-         }
-         else {
-            vdecl[i].rangeHint.first = 0;
-            vdecl[i].rangeHint.last = 0;
-         }
+      ret = svga_rebind_shaders(svga);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   ret = validate_sampler_resources(svga);
+   if (ret != PIPE_OK)
+      return ret;
 
-         swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
-                                 vb_handle[i], SVGA_RELOC_READ);
+   ret = validate_constant_buffers(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   /* Get handle for each referenced vertex buffer */
+   for (i = 0; i < vbuf_count; i++) {
+      struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer);
+
+      if (sbuf) {
+         assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+         vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b);
+         if (vb_handle[i] == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
       }
+      else {
+         vb_handle[i] = NULL;
+      }
+   }
 
-      memcpy(prim, hwtnl->cmd.prim,
-             hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+   /* Get handles for the index buffers */
+   if (ib) {
+      struct svga_buffer *sbuf = svga_buffer(ib);
 
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
-                                 ib_handle[i], SVGA_RELOC_READ);
-         pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+      (void) sbuf; /* silence unused var warning */
+
+      ib_handle = svga_buffer_handle(svga, ib);
+      if (ib_handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+   else {
+      ib_handle = NULL;
+   }
+
+   /* setup vertex attribute input layout */
+   if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+      ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                         hwtnl->cmd.vdecl_layout_id);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+   }
+
+   /* setup vertex buffers */
+   {
+      SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS];
+
+      for (i = 0; i < vbuf_count; i++) {
+         buffers[i].stride = hwtnl->cmd.vbufs[i].stride;
+         buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+      }
+      if (vbuf_count > 0) {
+         ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+                                              0,    /* startBuffer */
+                                              buffers, vb_handle);
+         if (ret != PIPE_OK)
+            return ret;
       }
+   }
+
+   /* Set primitive type (line, tri, etc) */
+   if (svga->state.hw_draw.topology != range->primType) {
+      ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.topology = range->primType;
+   }
 
-      SVGA_FIFOCommitAll(swc);
-      hwtnl->cmd.prim_count = 0;
+   if (ib_handle) {
+      /* indexed drawing */
+      SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
+
+      /* setup index buffer */
+      ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+                                         indexFormat,
+                                         range->indexArray.offset);
+      if (ret != PIPE_OK)
+         return ret;
+
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+                                                  vcount,
+                                                  instance_count,
+                                                  0, /* startIndexLocation */
+                                                  range->indexBias,
+                                                  start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced drawing */
+         ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+                                         vcount,
+                                         0,      /* startIndexLocation */
+                                         range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
    }
+   else {
+      /* non-indexed drawing */
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+                                           vcount,
+                                           instance_count,
+                                           range->indexBias,
+                                           start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced */
+         ret = SVGA3D_vgpu10_Draw(svga->swc,
+                                  vcount,
+                                  range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   hwtnl->cmd.prim_count = 0;
 
    return PIPE_OK;
 }
 
 
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+   if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+      /* we only queue up primitive for VGPU9 */
+      return draw_vgpu9(hwtnl);
+   }
+   return PIPE_OK;
+}
+
+
 void
 svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias)
 {
@@ -298,18 +634,28 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 {
    unsigned i;
 
+   assert(!svga_have_vgpu10(hwtnl->svga));
+
    for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-      struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i];
-      unsigned size = vb ? vb->width0 : 0;
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+      unsigned size = vb->buffer ? vb->buffer->width0 : 0;
       unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
       unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
       int index_bias = (int) range->indexBias + hwtnl->index_bias;
       unsigned width;
 
+      if (size == 0)
+         continue;
+
       assert(vb);
       assert(size);
       assert(offset < size);
       assert(min_index <= max_index);
+      (void) width;
+      (void) stride;
+      (void) offset;
+      (void) size;
 
       switch (hwtnl->cmd.vdecl[i].identity.type) {
       case SVGA3D_DECLTYPE_FLOAT1:
@@ -390,6 +736,9 @@ check_draw_params(struct svga_hwtnl *hwtnl,
       assert(size);
       assert(offset < size);
       assert(stride);
+      (void) size;
+      (void) offset;
+      (void) stride;
 
       switch (range->primType) {
       case SVGA3D_PRIMITIVE_POINTLIST:
@@ -421,33 +770,57 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 }
 
 
+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ */
 enum pipe_error
 svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
                 const SVGA3dPrimitiveRange * range,
+                unsigned vcount,
                 unsigned min_index,
-                unsigned max_index, struct pipe_resource *ib)
+                unsigned max_index, struct pipe_resource *ib,
+                unsigned start_instance, unsigned instance_count)
 {
    enum pipe_error ret = PIPE_OK;
 
+   if (svga_have_vgpu10(hwtnl->svga)) {
+      /* draw immediately */
+      ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                        start_instance, instance_count);
+      if (ret != PIPE_OK) {
+         svga_context_flush(hwtnl->svga, NULL);
+         ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                           start_instance, instance_count);
+         assert(ret == PIPE_OK);
+      }
+   }
+   else {
+      /* batch up drawing commands */
 #ifdef DEBUG
-   check_draw_params(hwtnl, range, min_index, max_index, ib);
+      check_draw_params(hwtnl, range, min_index, max_index, ib);
+      assert(start_instance == 0);
+      assert(instance_count <= 1);
+#else
+      (void) check_draw_params;
 #endif
 
-   if (hwtnl->cmd.prim_count + 1 >= QSZ) {
-      ret = svga_hwtnl_flush(hwtnl);
-      if (ret != PIPE_OK)
-         return ret;
-   }
+      if (hwtnl->cmd.prim_count + 1 >= QSZ) {
+         ret = svga_hwtnl_flush(hwtnl);
+         if (ret != PIPE_OK)
+            return ret;
+      }
 
-   /* min/max indices are relative to bias */
-   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
-   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+      /* min/max indices are relative to bias */
+      hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+      hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
 
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
 
-   pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
-   hwtnl->cmd.prim_count++;
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+      hwtnl->cmd.prim_count++;
+   }
 
    return ret;
 }
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
index 1db79cd91c7..af8ecabcbae 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -35,54 +35,50 @@ struct svga_winsys_context;
 struct svga_screen;
 struct svga_context;
 struct pipe_resource;
+struct u_upload_mgr;
 
-struct svga_hwtnl *
-svga_hwtnl_create(struct svga_context *svga);
+struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga);
 
-void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl);
 
-void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
-                               boolean flatshade,
-                               boolean flatshade_first );
+void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
+                              boolean flatshade, boolean flatshade_first);
 
-void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
-                              unsigned mode );
+void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode);
 
-void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
-                       unsigned i,
-                       const SVGA3dVertexDecl *decl,
-                       struct pipe_resource *vb);
+void
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layoutId);
 
-void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
-                             unsigned count );
+void
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers);
 
-
-enum pipe_error 
-svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
-                        unsigned prim, 
-                        unsigned start, 
-                        unsigned count);
+enum pipe_error
+svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count);
 
 enum pipe_error
-svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
-                                struct pipe_resource *indexBuffer,
-                                unsigned index_size,
-                                int index_bias,
-                                unsigned min_index,
-                                unsigned max_index,
-                                unsigned prim, 
-                                unsigned start, 
-                                unsigned count );
+svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
+                               struct pipe_resource *indexBuffer,
+                               unsigned index_size,
+                               int index_bias,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count);
 
 boolean
-svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl,
-                               struct pipe_resource *buffer );
+svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
+                              struct pipe_resource *buffer);
 
-enum pipe_error
-svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);
 
-void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
-                                int index_bias);
+void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);
 
 
 #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index d4d77200f59..5635411d938 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -49,8 +49,8 @@ generate_indices(struct svga_hwtnl *hwtnl,
    struct pipe_resource *dst = NULL;
    void *dst_map = NULL;
 
-   dst = pipe_buffer_create(pipe->screen,
-                            PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
+   dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER,
+                            PIPE_USAGE_IMMUTABLE, size);
    if (dst == NULL)
       goto fail;
 
@@ -168,7 +168,8 @@ retrieve_or_generate_indices(struct svga_hwtnl *hwtnl,
 
 static enum pipe_error
 simple_draw_arrays(struct svga_hwtnl *hwtnl,
-                   unsigned prim, unsigned start, unsigned count)
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count)
 {
    SVGA3dPrimitiveRange range;
    unsigned hw_prim;
@@ -191,13 +192,16 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl,
     * looking at those numbers knows to adjust them by
     * range.indexBias.
     */
-   return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          0, count - 1, NULL,
+                          start_instance, instance_count);
 }
 
 
 enum pipe_error
 svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
-                       unsigned prim, unsigned start, unsigned count)
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count)
 {
    unsigned gen_prim, gen_size, gen_nr, gen_type;
    u_generate_func gen_func;
@@ -228,7 +232,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
    }
 
    if (gen_type == U_GENERATE_LINEAR) {
-      return simple_draw_arrays(hwtnl, gen_prim, start, count);
+      return simple_draw_arrays(hwtnl, gen_prim, start, count,
+                                start_instance, instance_count);
    }
    else {
       struct pipe_resource *gen_buf = NULL;
@@ -250,8 +255,9 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
                                                   start,
                                                   0,
                                                   count - 1,
-                                                  gen_prim, 0, gen_nr);
-
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
       if (ret != PIPE_OK)
          goto done;
 
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index 038500a35bd..9df8f6e9beb 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -25,6 +25,7 @@
 
 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"
 
 #include "svga_cmd.h"
@@ -45,7 +46,7 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
    struct pipe_context *pipe = &hwtnl->svga->pipe;
    struct pipe_transfer *src_transfer = NULL;
    struct pipe_transfer *dst_transfer = NULL;
-   unsigned size;
+   unsigned size = index_size * nr;
    const void *src_map = NULL;
    struct pipe_resource *dst = NULL;
    void *dst_map = NULL;
@@ -98,7 +99,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
                                       unsigned index_size, int index_bias,
                                       unsigned min_index, unsigned max_index,
                                       unsigned prim, unsigned start,
-                                      unsigned count)
+                                      unsigned count,
+                                      unsigned start_instance,
+                                      unsigned instance_count)
 {
    SVGA3dPrimitiveRange range;
    unsigned hw_prim;
@@ -109,12 +112,6 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
    if (hw_count == 0)
       return PIPE_OK; /* nothing to draw */
 
-   /* We should never see user-space buffers in the driver.  The vbuf
-    * module should have converted them into real buffers.
-    */
-   if (index_buffer)
-      assert(!svga_buffer_is_user_buffer(index_buffer));
-
    range.primType = hw_prim;
    range.primitiveCount = hw_count;
    range.indexArray.offset = index_offset;
@@ -122,7 +119,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
    range.indexWidth = index_size;
    range.indexBias = index_bias;
 
-   return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          min_index, max_index, index_buffer,
+                          start_instance, instance_count);
 }
 
 
@@ -131,7 +130,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                struct pipe_resource *index_buffer,
                                unsigned index_size, int index_bias,
                                unsigned min_index, unsigned max_index,
-                               unsigned prim, unsigned start, unsigned count)
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count)
 {
    unsigned gen_prim, gen_size, gen_nr, gen_type;
    u_translate_func gen_func;
@@ -165,7 +165,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                    index_bias,
                                                    min_index,
                                                    max_index,
-                                                   gen_prim, start, count);
+                                                   gen_prim, start, count,
+                                                   start_instance,
+                                                   instance_count);
    }
    else {
       struct pipe_resource *gen_buf = NULL;
@@ -190,7 +192,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                   index_bias,
                                                   min_index,
                                                   max_index,
-                                                  gen_prim, 0, gen_nr);
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
       if (ret != PIPE_OK)
          goto done;
 
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index 9ab87e8259a..c8217422c96 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -46,7 +46,11 @@ static const unsigned svga_hw_prims =
     (1 << PIPE_PRIM_LINE_STRIP) |
     (1 << PIPE_PRIM_TRIANGLES) |
     (1 << PIPE_PRIM_TRIANGLE_STRIP) |
-    (1 << PIPE_PRIM_TRIANGLE_FAN));
+    (1 << PIPE_PRIM_TRIANGLE_FAN) |
+    (1 << PIPE_PRIM_LINES_ADJACENCY) |
+    (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
 
 
 /**
@@ -57,8 +61,8 @@ static const unsigned svga_hw_prims =
  * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON.  We convert
  * those to other types of primitives with index/translation code.
  */
-static inline unsigned
-svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
+static inline SVGA3dPrimitiveType
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
 {
    switch (mode) {
    case PIPE_PRIM_POINTS:
@@ -85,6 +89,22 @@ svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
       *prim_count = vcount - 2;
       return SVGA3D_PRIMITIVE_TRIANGLEFAN; 
 
+   case PIPE_PRIM_LINES_ADJACENCY:
+      *prim_count = vcount / 4;
+      return SVGA3D_PRIMITIVE_LINELIST_ADJ;
+
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      *prim_count = vcount - 3;
+      return SVGA3D_PRIMITIVE_LINESTRIP_ADJ;
+
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      *prim_count = vcount / 6;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ;
+
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      *prim_count = vcount / 2 - 2 ;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+
    default:
       assert(0);
       *prim_count = 0;
@@ -110,13 +130,19 @@ struct index_cache {
 struct draw_cmd {
    struct svga_winsys_context *swc;
 
+   /* vertex layout info */
    SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
-   struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX];
    unsigned vdecl_count;
+   SVGA3dElementLayoutId vdecl_layout_id;
+   unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX];
+
+   /* vertex buffer info */
+   struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX];
+   unsigned vbuf_count;
 
    SVGA3dPrimitiveRange prim[QSZ];
    struct pipe_resource *prim_ib[QSZ];
-   unsigned prim_count;
+   unsigned prim_count;   /**< number of primitives for this draw */
    unsigned min_index[QSZ];
    unsigned max_index[QSZ];
 };
@@ -158,9 +184,11 @@ struct svga_hwtnl {
 enum pipe_error 
 svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
                  const SVGA3dPrimitiveRange *range,
+                 unsigned vcount,
                  unsigned min_index,
                  unsigned max_index,
-                 struct pipe_resource *ib );
+                 struct pipe_resource *ib,
+                 unsigned start_instance, unsigned instance_count);
 
 enum pipe_error
 svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
@@ -171,7 +199,9 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
                                        unsigned max_index,
                                        unsigned prim, 
                                        unsigned start,
-                                       unsigned count );
+                                       unsigned count,
+                                       unsigned start_instance,
+                                       unsigned instance_count);
 
 
 #endif
diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 8c1b161e6fa..67f6e51fce6 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -26,6 +26,7 @@
 
 #include "pipe/p_format.h"
 #include "util/u_debug.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 
 #include "svga_winsys.h"
@@ -33,6 +34,319 @@
 #include "svga_format.h"
 
 
+/** Describes mapping from gallium formats to SVGA vertex/pixel formats */
+struct vgpu10_format_entry
+{
+   enum pipe_format pformat;
+   SVGA3dSurfaceFormat vertex_format;
+   SVGA3dSurfaceFormat pixel_format;
+   unsigned flags;
+};
+
+
+static const struct vgpu10_format_entry format_conversion_table[] =
+{
+   /* Gallium format                    SVGA3D vertex format        SVGA3D pixel format          Flags */
+   { PIPE_FORMAT_NONE,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_UNORM,        SVGA3D_B8G8R8A8_UNORM,      SVGA3D_B8G8R8A8_UNORM,       0 },
+   { PIPE_FORMAT_B8G8R8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM,       0 },
+   { PIPE_FORMAT_A8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5A1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B5G5R5A1_UNORM,       0 },
+   { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_A4R4G4B4,             0 },
+   { PIPE_FORMAT_B5G6R5_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_B5G6R5_UNORM,         0 },
+   { PIPE_FORMAT_R10G10B10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_R10G10B10A2_UNORM,    0 },
+   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_A8_UNORM,             0 },
+   { PIPE_FORMAT_I8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_UYVY,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YUYV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_Z_D16,                0 },
+   { PIPE_FORMAT_Z32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT,            0 },
+   { PIPE_FORMAT_Z24_UNORM_S8_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_S8_UINT_Z24_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z24X8_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_X8Z24_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64A64_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FLOAT,             SVGA3D_R32_FLOAT,           SVGA3D_R32_FLOAT,            0 },
+   { PIPE_FORMAT_R32G32_FLOAT,          SVGA3D_R32G32_FLOAT,        SVGA3D_R32G32_FLOAT,         0 },
+   { PIPE_FORMAT_R32G32B32_FLOAT,       SVGA3D_R32G32B32_FLOAT,     SVGA3D_R32G32B32_FLOAT,      0 },
+   { PIPE_FORMAT_R32G32B32A32_FLOAT,    SVGA3D_R32G32B32A32_FLOAT,  SVGA3D_R32G32B32A32_FLOAT,   0 },
+   { PIPE_FORMAT_R32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_USCALED,           SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_USCALED,        SVGA3D_R32G32_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_USCALED,     SVGA3D_R32G32B32_UINT,      SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_USCALED,  SVGA3D_R32G32B32A32_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_SSCALED,           SVGA3D_R32_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_SSCALED,        SVGA3D_R32G32_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_SSCALED,     SVGA3D_R32G32B32_SINT,      SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_SSCALED,  SVGA3D_R32G32B32A32_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16_UNORM,             SVGA3D_R16_UNORM,           SVGA3D_R16_UNORM,            0 },
+   { PIPE_FORMAT_R16G16_UNORM,          SVGA3D_R16G16_UNORM,        SVGA3D_R16G16_UNORM,         0 },
+   { PIPE_FORMAT_R16G16B16_UNORM,       SVGA3D_R16G16B16A16_UNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UNORM,    SVGA3D_R16G16B16A16_UNORM,  SVGA3D_R16G16B16A16_UNORM,   0 },
+   { PIPE_FORMAT_R16_USCALED,           SVGA3D_R16_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_USCALED,        SVGA3D_R16G16_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_USCALED,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_USCALED,  SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16_SNORM,             SVGA3D_R16_SNORM,           SVGA3D_R16_SNORM,            0 },
+   { PIPE_FORMAT_R16G16_SNORM,          SVGA3D_R16G16_SNORM,        SVGA3D_R16G16_SNORM,         0 },
+   { PIPE_FORMAT_R16G16B16_SNORM,       SVGA3D_R16G16B16A16_SNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SNORM,    SVGA3D_R16G16B16A16_SNORM,  SVGA3D_R16G16B16A16_SNORM,   0 },
+   { PIPE_FORMAT_R16_SSCALED,           SVGA3D_R16_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_SSCALED,        SVGA3D_R16G16_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_SSCALED,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_SSCALED,  SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8_UNORM,              SVGA3D_R8_UNORM,            SVGA3D_R8_UNORM,             0 },
+   { PIPE_FORMAT_R8G8_UNORM,            SVGA3D_R8G8_UNORM,          SVGA3D_R8G8_UNORM,           0 },
+   { PIPE_FORMAT_R8G8B8_UNORM,          SVGA3D_R8G8B8A8_UNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UNORM,        SVGA3D_R8G8B8A8_UNORM,      SVGA3D_R8G8B8A8_UNORM,       0 },
+   { PIPE_FORMAT_X8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_USCALED,            SVGA3D_R8_UINT,             SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_USCALED,          SVGA3D_R8G8_UINT,           SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_USCALED,        SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_USCALED,      SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { 73,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SNORM,              SVGA3D_R8_SNORM,            SVGA3D_R8_SNORM,             0 },
+   { PIPE_FORMAT_R8G8_SNORM,            SVGA3D_R8G8_SNORM,          SVGA3D_R8G8_SNORM,           0 },
+   { PIPE_FORMAT_R8G8B8_SNORM,          SVGA3D_R8G8B8A8_SNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SNORM,        SVGA3D_R8G8B8A8_SNORM,      SVGA3D_R8G8B8A8_SNORM,       0 },
+   { 78,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 79,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 80,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 81,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SSCALED,            SVGA3D_R8_SINT,             SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_SSCALED,          SVGA3D_R8G8_SINT,           SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_SSCALED,        SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_SSCALED,      SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { 86,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FIXED,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_FIXED,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_FIXED,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_FIXED,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16_FLOAT,             SVGA3D_R16_FLOAT,           SVGA3D_R16_FLOAT,            0 },
+   { PIPE_FORMAT_R16G16_FLOAT,          SVGA3D_R16G16_FLOAT,        SVGA3D_R16G16_FLOAT,         0 },
+   { PIPE_FORMAT_R16G16B16_FLOAT,       SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_FLOAT,    SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_R16G16B16A16_FLOAT,   0 },
+   { PIPE_FORMAT_L8_SRGB,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8A8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_B8G8R8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_A8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_R8G8B8A8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_DXT1_RGB,              SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT1_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT3_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
+   { PIPE_FORMAT_DXT5_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
+   { PIPE_FORMAT_DXT1_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT1_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT3_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
+   { PIPE_FORMAT_DXT5_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
+   { PIPE_FORMAT_RGTC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_UNORM,            0 },
+   { PIPE_FORMAT_RGTC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_SNORM,            0 },
+   { PIPE_FORMAT_RGTC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_UNORM,            0 },
+   { PIPE_FORMAT_RGTC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_SNORM,            0 },
+   { PIPE_FORMAT_R8G8_B8G8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_G8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8SG8SB8UX8U_NORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R5SG5SB6U_NORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5X1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_R11G11B10_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_R11G11B10_FLOAT,      0 },
+   { PIPE_FORMAT_R9G9B9E5_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_R9G9B9E5_SHAREDEXP,   0 },
+   { PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,  SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT_S8X24_UINT, 0 },
+   { PIPE_FORMAT_R1_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_USCALED,   SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_SNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA },
+   { PIPE_FORMAT_R10SG10SB10SA2U_NORM,  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8Bx_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B4G4R4X4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X24S8_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8X24_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X32_S8X24_UINT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B2G3R3_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV16,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_IYUV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV21,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A4R4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_R10G10B10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_B10G10R10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_B10G10R10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_B10G10R10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_R8_UINT,               SVGA3D_R8_UINT,             SVGA3D_R8_UINT,              0 },
+   { PIPE_FORMAT_R8G8_UINT,             SVGA3D_R8G8_UINT,           SVGA3D_R8G8_UINT,            0 },
+   { PIPE_FORMAT_R8G8B8_UINT,           SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UINT,         SVGA3D_R8G8B8A8_UINT,       SVGA3D_R8G8B8A8_UINT,        0 },
+   { PIPE_FORMAT_R8_SINT,               SVGA3D_R8_SINT,             SVGA3D_R8_SINT,              0 },
+   { PIPE_FORMAT_R8G8_SINT,             SVGA3D_R8G8_SINT,           SVGA3D_R8G8_SINT,            0 },
+   { PIPE_FORMAT_R8G8B8_SINT,           SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SINT,         SVGA3D_R8G8B8A8_SINT,       SVGA3D_R8G8B8A8_SINT,        0 },
+   { PIPE_FORMAT_R16_UINT,              SVGA3D_R16_UINT,            SVGA3D_R16_UINT,             0 },
+   { PIPE_FORMAT_R16G16_UINT,           SVGA3D_R16G16_UINT,         SVGA3D_R16G16_UINT,          0 },
+   { PIPE_FORMAT_R16G16B16_UINT,        SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UINT,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_R16G16B16A16_UINT,    0 },
+   { PIPE_FORMAT_R16_SINT,              SVGA3D_R16_SINT,            SVGA3D_R16_SINT,             0 },
+   { PIPE_FORMAT_R16G16_SINT,           SVGA3D_R16G16_SINT,         SVGA3D_R16G16_SINT,          0 },
+   { PIPE_FORMAT_R16G16B16_SINT,        SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SINT,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_R16G16B16A16_SINT,    0 },
+   { PIPE_FORMAT_R32_UINT,              SVGA3D_R32_UINT,            SVGA3D_R32_UINT,             0 },
+   { PIPE_FORMAT_R32G32_UINT,           SVGA3D_R32G32_UINT,         SVGA3D_R32G32_UINT,          0 },
+   { PIPE_FORMAT_R32G32B32_UINT,        SVGA3D_R32G32B32_UINT,      SVGA3D_R32G32B32_UINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UINT,     SVGA3D_R32G32B32A32_UINT,   SVGA3D_R32G32B32A32_UINT,    0 },
+   { PIPE_FORMAT_R32_SINT,              SVGA3D_R32_SINT,            SVGA3D_R32_SINT,             0 },
+   { PIPE_FORMAT_R32G32_SINT,           SVGA3D_R32G32_SINT,         SVGA3D_R32G32_SINT,          0 },
+   { PIPE_FORMAT_R32G32B32_SINT,        SVGA3D_R32G32B32_SINT,      SVGA3D_R32G32B32_SINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SINT,     SVGA3D_R32G32B32A32_SINT,   SVGA3D_R32G32B32A32_SINT,    0 },
+   { PIPE_FORMAT_A8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UINT,      SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC1_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8_R8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_B8R8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10X2_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_UINT,      SVGA3D_R10G10B10A2_UINT,    SVGA3D_R10G10B10A2_UINT,     0 },
+   { PIPE_FORMAT_B5G6R5_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGBA_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_UFLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16L16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8A1,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8A1,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGBA8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGBA8,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+};
+
+
+/**
+ * Translate a gallium vertex format to a vgpu10 vertex format.
+ * Also, return any special vertex format flags.
+ */
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags)
+{
+   assert(format < Elements(format_conversion_table));
+   if (format >= Elements(format_conversion_table)) {
+      format = PIPE_FORMAT_NONE;
+   }
+   *svga_format = format_conversion_table[format].vertex_format;
+   *vf_flags = format_conversion_table[format].flags;
+}
+
+
 /*
  * Translate from gallium format to SVGA3D format.
  */
@@ -41,8 +355,16 @@ svga_translate_format(struct svga_screen *ss,
                       enum pipe_format format,
                       unsigned bind)
 {
-   switch(format) {
+   if (ss->sws->have_vgpu10) {
+      if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+         return format_conversion_table[format].vertex_format;
+      }
+      else {
+         return format_conversion_table[format].pixel_format;
+      }
+   }
 
+   switch(format) {
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       return SVGA3D_A8R8G8B8;
    case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -70,10 +392,13 @@ svga_translate_format(struct svga_screen *ss,
       return SVGA3D_A16B16G16R16;
 
    case PIPE_FORMAT_Z16_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.z16 : SVGA3D_Z_D16;
    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.s8z24 : SVGA3D_Z_D24S8;
    case PIPE_FORMAT_X8Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
       return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.x8z24 : SVGA3D_Z_D24X8;
 
    case PIPE_FORMAT_A8_UNORM:
@@ -116,12 +441,17 @@ svga_translate_format(struct svga_screen *ss,
  * Format capability description entry.
  */
 struct format_cap {
+   const char *name;
+
    SVGA3dSurfaceFormat format;
 
    /*
     * Capability index corresponding to the format.
     */
-   SVGA3dDevCapIndex index;
+   SVGA3dDevCapIndex devcap;
+
+   /* size of each pixel/block */
+   unsigned block_width, block_height, block_bytes;
 
    /*
     * Mask of supported SVGA3dFormatOp operations, to be inferred when the
@@ -134,598 +464,1637 @@ struct format_cap {
 /*
  * Format capability description table.
  *
- * Ordererd by increasing SVGA3dSurfaceFormat value, but with gaps.
+ * Ordered by increasing SVGA3dSurfaceFormat value, but with gaps.
+ *
+ * Note: there are some special cases below where we set devcap=0 and
+ * avoid querying the host.  In particular, depth/stencil formats which
+ * can be rendered to and sampled from.  For example, the gallium format
+ * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
+ * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
+ * If we want to query if a format supports both rendering and sampling the
+ * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * SVGA3D_R24_UNORM_X8_TYPELESS.  So we override the host query for those
+ * formats and report that both can do rendering and sampling.
  */
 static const struct format_cap format_cap_table[] = {
    {
+      "SVGA3D_FORMAT_INVALID",
+      SVGA3D_FORMAT_INVALID, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_X8R8G8B8",
       SVGA3D_X8R8G8B8,
       SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A8R8G8B8",
       SVGA3D_A8R8G8B8,
       SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_R5G6B5",
       SVGA3D_R5G6B5,
       SVGA3D_DEVCAP_SURFACEFMT_R5G6B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_X1R5G5B5",
       SVGA3D_X1R5G5B5,
       SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A1R5G5B5",
       SVGA3D_A1R5G5B5,
       SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A4R4G4B4",
       SVGA3D_A4R4G4B4,
       SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_Z_D32 is not yet supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      /*
+       * SVGA3D_Z_D32 is not yet supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_Z_D32",
+      SVGA3D_Z_D32, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Z_D16",
       SVGA3D_Z_D16,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D16,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_Z_D24S8",
       SVGA3D_Z_D24S8,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_Z_D15S1",
       SVGA3D_Z_D15S1,
       SVGA3D_DEVCAP_MAX,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_LUMINANCE8",
       SVGA3D_LUMINANCE8,
       SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8,
+      1, 1, 1,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
-      SVGA3D_LUMINANCE8_ALPHA8,
-      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      /*
+       * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_LUMINANCE4_ALPHA4",
+      SVGA3D_LUMINANCE4_ALPHA4, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      "SVGA3D_LUMINANCE16",
       SVGA3D_LUMINANCE16,
       SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
+   },
+   {
+      "SVGA3D_LUMINANCE8_ALPHA8",
+      SVGA3D_LUMINANCE8_ALPHA8,
+      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_DXT1",
       SVGA3D_DXT1,
       SVGA3D_DEVCAP_SURFACEFMT_DXT1,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT2",
       SVGA3D_DXT2,
       SVGA3D_DEVCAP_SURFACEFMT_DXT2,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT3",
       SVGA3D_DXT3,
       SVGA3D_DEVCAP_SURFACEFMT_DXT3,
+      4, 4, 16,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT4",
       SVGA3D_DXT4,
       SVGA3D_DEVCAP_SURFACEFMT_DXT4,
+      4, 4, 16,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_DXT5",
       SVGA3D_DXT5,
       SVGA3D_DEVCAP_SURFACEFMT_DXT5,
+      4, 4, 8,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_BUMPU8V8",
       SVGA3D_BUMPU8V8,
       SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
-   /*
-    * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
    {
+      /*
+       * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_BUMPL6V5U5",
+      SVGA3D_BUMPL6V5U5, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BUMPX8L8V8U8",
       SVGA3D_BUMPX8L8V8U8,
       SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_FORMAT_DEAD1",
+      SVGA3D_FORMAT_DEAD1, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_BUMPL8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
    {
+      "SVGA3D_ARGB_S10E5",
       SVGA3D_ARGB_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_ARGB_S23E8",
       SVGA3D_ARGB_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A2R10G10B10",
       SVGA3D_A2R10G10B10,
       SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
-    */
    {
+      /*
+       * SVGA3D_V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
+       */
+      "SVGA3D_V8U8",
+      SVGA3D_V8U8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Q8W8V8U8",
       SVGA3D_Q8W8V8U8,
       SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
+      "SVGA3D_CxV8U8",
       SVGA3D_CxV8U8,
       SVGA3D_DEVCAP_SURFACEFMT_CxV8U8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE
+   },
+   {
+      /*
+       * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
+       */
+      "SVGA3D_X8L8V8U8",
+      SVGA3D_X8L8V8U8, 0, 0, 0, 0, 0
    },
-   /*
-    * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
    {
+      "SVGA3D_A2W10V10U10",
       SVGA3D_A2W10V10U10,
       SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE
    },
    {
+      "SVGA3D_ALPHA8",
       SVGA3D_ALPHA8,
       SVGA3D_DEVCAP_SURFACEFMT_ALPHA8,
+      1, 1, 1,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_R_S10E5",
       SVGA3D_R_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_R_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_R_S23E8",
       SVGA3D_R_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_R_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_RG_S10E5",
       SVGA3D_RG_S10E5,
       SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5,
+      1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_RG_S23E8",
       SVGA3D_RG_S23E8,
       SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-   /*
-    * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
-    */
    {
+      /*
+       * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
+       */
+      "SVGA3D_BUFFER",
+      SVGA3D_BUFFER, 0, 1, 1, 1, 0
+   },
+   {
+      "SVGA3D_Z_D24X8",
       SVGA3D_Z_D24X8,
       SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
    },
    {
+      "SVGA3D_V16U16",
       SVGA3D_V16U16,
       SVGA3D_DEVCAP_SURFACEFMT_V16U16,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
    },
    {
+      "SVGA3D_G16R16",
       SVGA3D_G16R16,
       SVGA3D_DEVCAP_SURFACEFMT_G16R16,
+      1, 1, 4,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_A16B16G16R16",
       SVGA3D_A16B16G16R16,
       SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16,
+      1, 1, 8,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
       SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
       SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
+      "SVGA3D_UYVY",
       SVGA3D_UYVY,
       SVGA3D_DEVCAP_SURFACEFMT_UYVY,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_YUY2",
       SVGA3D_YUY2,
       SVGA3D_DEVCAP_SURFACEFMT_YUY2,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_NV12",
       SVGA3D_NV12,
       SVGA3D_DEVCAP_SURFACEFMT_NV12,
+      0, 0, 0,
       0
    },
    {
+      "SVGA3D_AYUV",
       SVGA3D_AYUV,
       SVGA3D_DEVCAP_SURFACEFMT_AYUV,
+      0, 0, 0,
       0
    },
    {
-      SVGA3D_Z_DF16,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
-      0
+      "SVGA3D_R32G32B32A32_TYPELESS",
+      SVGA3D_R32G32B32A32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
-      SVGA3D_Z_DF24,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
-      0
+      "SVGA3D_R32G32B32A32_UINT",
+      SVGA3D_R32G32B32A32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
    {
-      SVGA3D_Z_D24S8_INT,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
-      0
+      "SVGA3D_R32G32B32A32_SINT",
+      SVGA3D_R32G32B32A32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
    },
-};
-
-
-/*
- * Get format capabilities from the host.  It takes in consideration
- * deprecated/unsupported formats, and formats which are implicitely assumed to
- * be supported when the host does not provide an explicit capability entry.
- */
-void
-svga_get_format_cap(struct svga_screen *ss,
-                    SVGA3dSurfaceFormat format,
-                    SVGA3dSurfaceFormatCaps *caps)
-{
-   const struct format_cap *entry;
-
-   for (entry = format_cap_table; entry < format_cap_table + Elements(format_cap_table); ++entry) {
-      if (entry->format == format) {
-         struct svga_winsys_screen *sws = ss->sws;
-         SVGA3dDevCapResult result;
-
-         if (sws->get_cap(sws, entry->index, &result)) {
-            /* Explicitly advertised format */
-            caps->value = result.u;
-         } else {
-            /* Implicitly advertised format -- use default caps */
-            caps->value = entry->defaultOperations;
-         }
-
-         return;
-      }
-   }
-
-   /* Unsupported format */
-   caps->value = 0;
-}
-
-
-/**
- * Return block size and bytes per block for the given SVGA3D format.
- * block_width and block_height are one for uncompressed formats and
- * greater than one for compressed formats.
- * Note: we don't handle formats that are unsupported, according to
- * the format_cap_table above.
- */
-void
-svga_format_size(SVGA3dSurfaceFormat format,
-                 unsigned *block_width,
-                 unsigned *block_height,
-                 unsigned *bytes_per_block)
-{
-   *block_width = *block_height = 1;
-
-   switch (format) {
-   case SVGA3D_X8R8G8B8:
-   case SVGA3D_A8R8G8B8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_R5G6B5:
-   case SVGA3D_X1R5G5B5:
-   case SVGA3D_A1R5G5B5:
-   case SVGA3D_A4R4G4B4:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D32:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D16:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D24S8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D15S1:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_LUMINANCE8:
-   case SVGA3D_LUMINANCE4_ALPHA4:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_LUMINANCE16:
-   case SVGA3D_LUMINANCE8_ALPHA8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_DXT1:
-   case SVGA3D_DXT2:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_DXT3:
-   case SVGA3D_DXT4:
-   case SVGA3D_DXT5:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_BUMPU8V8:
-   case SVGA3D_BUMPL6V5U5:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_BUMPX8L8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ARGB_S10E5:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_ARGB_S23E8:
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_A2R10G10B10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Q8W8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_CxV8U8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_X8L8V8U8:
-   case SVGA3D_A2W10V10U10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ALPHA8:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_R_S10E5:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_R_S23E8:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S10E5:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S23E8:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_BUFFER:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_Z_D24X8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_V16U16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_G16R16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_A16B16G16R16:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_Z_DF16:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_Z_DF24:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_Z_D24S8_INT:
-      *bytes_per_block = 4;
-      return;
-
+   {
+      "SVGA3D_R32G32B32_TYPELESS",
+      SVGA3D_R32G32B32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_FLOAT",
+      SVGA3D_R32G32B32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_UINT",
+      SVGA3D_R32G32B32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_SINT",
+      SVGA3D_R32G32B32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_TYPELESS",
+      SVGA3D_R16G16B16A16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UINT",
+      SVGA3D_R16G16B16A16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SNORM",
+      SVGA3D_R16G16B16A16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SINT",
+      SVGA3D_R16G16B16A16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_TYPELESS",
+      SVGA3D_R32G32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_UINT",
+      SVGA3D_R32G32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_SINT",
+      SVGA3D_R32G32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G8X24_TYPELESS",
+      SVGA3D_R32G8X24_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT_S8X24_UINT",
+      SVGA3D_D32_FLOAT_S8X24_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT_X8X24_TYPELESS",
+      SVGA3D_R32_FLOAT_X8X24_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X32_TYPELESS_G8X24_UINT",
+      SVGA3D_X32_TYPELESS_G8X24_UINT,
+      SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_TYPELESS",
+      SVGA3D_R10G10B10A2_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UINT",
+      SVGA3D_R10G10B10A2_UINT,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R11G11B10_FLOAT",
+      SVGA3D_R11G11B10_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_TYPELESS",
+      SVGA3D_R8G8B8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM",
+      SVGA3D_R8G8B8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM_SRGB",
+      SVGA3D_R8G8B8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UINT",
+      SVGA3D_R8G8B8A8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+      },
+   {
+      "SVGA3D_R8G8B8A8_SINT",
+      SVGA3D_R8G8B8A8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_TYPELESS",
+      SVGA3D_R16G16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UINT",
+      SVGA3D_R16G16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SINT",
+      SVGA3D_R16G16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_TYPELESS",
+      SVGA3D_R32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT",
+      SVGA3D_D32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R32_UINT",
+      SVGA3D_R32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_SINT",
+      SVGA3D_R32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R24G8_TYPELESS",
+      SVGA3D_R24G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D24_UNORM_S8_UINT",
+      SVGA3D_D24_UNORM_S8_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R24_UNORM_X8_TYPELESS",
+      SVGA3D_R24_UNORM_X8_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X24_TYPELESS_G8_UINT",
+      SVGA3D_X24_TYPELESS_G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_TYPELESS",
+      SVGA3D_R8G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UNORM",
+      SVGA3D_R8G8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UINT",
+      SVGA3D_R8G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_SINT",
+      SVGA3D_R8G8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_TYPELESS",
+      SVGA3D_R16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UNORM",
+      SVGA3D_R16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UINT",
+      SVGA3D_R16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SNORM",
+      SVGA3D_R16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SINT",
+      SVGA3D_R16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_TYPELESS",
+      SVGA3D_R8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8_TYPELESS,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UNORM",
+      SVGA3D_R8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UINT",
+      SVGA3D_R8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8_UINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SNORM",
+      SVGA3D_R8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_SNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SINT",
+      SVGA3D_R8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8_SINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_P8",
+      SVGA3D_P8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R9G9B9E5_SHAREDEXP",
+      SVGA3D_R9G9B9E5_SHAREDEXP,
+      SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_B8G8_UNORM",
+      SVGA3D_R8G8_B8G8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_G8R8_G8B8_UNORM",
+      SVGA3D_G8R8_G8B8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC1_TYPELESS",
+      SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC1_UNORM_SRGB",
+      SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_TYPELESS",
+      SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_UNORM_SRGB",
+      SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_TYPELESS",
+      SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_UNORM_SRGB",
+      SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC4_TYPELESS",
+      SVGA3D_BC4_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI1",
+      SVGA3D_ATI1, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC4_SNORM",
+      SVGA3D_BC4_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC4_SNORM,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_TYPELESS",
+      SVGA3D_BC5_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI2",
+      SVGA3D_ATI2, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC5_SNORM",
+      SVGA3D_BC5_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC5_SNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_R10G10B10_XR_BIAS_A2_UNORM",
+      SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_B8G8R8A8_TYPELESS",
+      SVGA3D_B8G8R8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM_SRGB",
+      SVGA3D_B8G8R8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_TYPELESS",
+      SVGA3D_B8G8R8X8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM_SRGB",
+      SVGA3D_B8G8R8X8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_Z_DF16",
+      SVGA3D_Z_DF16,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
+      1, 1, 2,
+      0
+   },
+   {
+      "SVGA3D_Z_DF24",
+      SVGA3D_Z_DF24,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
+      1, 1, 4,
+      0
+   },
+   {
+      "SVGA3D_Z_D24S8_INT",
+      SVGA3D_Z_D24S8_INT,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_YV12",
+      SVGA3D_YV12, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R32G32B32A32_FLOAT",
+      SVGA3D_R32G32B32A32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_FLOAT",
+      SVGA3D_R16G16B16A16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UNORM",
+      SVGA3D_R16G16B16A16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_FLOAT",
+      SVGA3D_R32G32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UNORM",
+      SVGA3D_R10G10B10A2_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_SNORM",
+      SVGA3D_R8G8B8A8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_FLOAT",
+      SVGA3D_R16G16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UNORM",
+      SVGA3D_R16G16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SNORM",
+      SVGA3D_R16G16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler, render target and
+       * depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT",
+      SVGA3D_R32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_SNORM",
+      SVGA3D_R8G8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_FLOAT",
+      SVGA3D_R16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16_FLOAT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_D16_UNORM",
+      SVGA3D_D16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_A8_UNORM",
+      SVGA3D_A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_A8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC1_UNORM",
+      SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_UNORM",
+      SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_UNORM",
+      SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_B5G6R5_UNORM",
+      SVGA3D_B5G6R5_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B5G5R5A1_UNORM",
+      SVGA3D_B5G5R5A1_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM",
+      SVGA3D_B8G8R8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM",
+      SVGA3D_B8G8R8X8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC4_UNORM",
+     SVGA3D_BC4_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC4_UNORM,
+     4, 4, 8,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_UNORM",
+     SVGA3D_BC5_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC5_UNORM,
+     4, 4, 16,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   }
+};
+
+
+/**
+ * Debug only:
+ * 1. check that format_cap_table[i] matches the i-th SVGA3D format.
+ * 2. check that format_conversion_table[i].pformat == i.
+ */
+static void
+check_format_tables(void)
+{
+   static boolean first_call = TRUE;
+
+   if (first_call) {
+      unsigned i;
+
+      STATIC_ASSERT(Elements(format_cap_table) == SVGA3D_FORMAT_MAX);
+      for (i = 0; i < Elements(format_cap_table); i++) {
+         assert(format_cap_table[i].format == i);
+      }
+
+      STATIC_ASSERT(Elements(format_conversion_table) == PIPE_FORMAT_COUNT);
+      for (i = 0; i < Elements(format_conversion_table); i++) {
+         assert(format_conversion_table[i].pformat == i);
+      }
+
+      first_call = FALSE;
+   }
+}
+
+
+/*
+ * Get format capabilities from the host.  It takes in consideration
+ * deprecated/unsupported formats, and formats which are implicitely assumed to
+ * be supported when the host does not provide an explicit capability entry.
+ */
+void
+svga_get_format_cap(struct svga_screen *ss,
+                    SVGA3dSurfaceFormat format,
+                    SVGA3dSurfaceFormatCaps *caps)
+{
+   struct svga_winsys_screen *sws = ss->sws;
+   SVGA3dDevCapResult result;
+   const struct format_cap *entry;
+
+#ifdef DEBUG
+   check_format_tables();
+#else
+   (void) check_format_tables;
+#endif
+
+   assert(format < Elements(format_cap_table));
+   entry = &format_cap_table[format];
+   assert(entry->format == format);
+
+   if (entry->devcap && sws->get_cap(sws, entry->devcap, &result)) {
+      /* Explicitly advertised format */
+      if (entry->devcap > SVGA3D_DEVCAP_DX) {
+         /* Translate DX/VGPU10 format cap to VGPU9 cap */
+         caps->value = 0;
+         if (result.u & SVGA3D_DXFMT_COLOR_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
+         if (!(result.u & SVGA3D_DXFMT_BLENDABLE))
+            caps->value |= SVGA3DFORMAT_OP_NOALPHABLEND;
+         if (result.u & SVGA3D_DXFMT_DEPTH_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_ZSTENCIL;
+         if (result.u & SVGA3D_DXFMT_SHADER_SAMPLE)
+            caps->value |= (SVGA3DFORMAT_OP_TEXTURE |
+                            SVGA3DFORMAT_OP_CUBETEXTURE);
+         if (result.u & SVGA3D_DXFMT_VOLUME)
+            caps->value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
+      }
+      else {
+         /* Return VGPU9 format cap as-is */
+         caps->value = result.u;
+      }
+
+   } else {
+      /* Implicitly advertised format -- use default caps */
+      caps->value = entry->defaultOperations;
+   }
+}
+
+
+void
+svga_format_size(SVGA3dSurfaceFormat format,
+                 unsigned *block_width,
+                 unsigned *block_height,
+                 unsigned *bytes_per_block)
+{
+   assert(format < Elements(format_cap_table));
+   *block_width = format_cap_table[format].block_width;
+   *block_height = format_cap_table[format].block_height;
+   *bytes_per_block = format_cap_table[format].block_bytes;
+   /* Make sure the the table entry was valid */
+   if (*block_width == 0)
+      debug_printf("Bad table entry for %s\n", svga_format_name(format));
+   assert(*block_width);
+   assert(*block_height);
+   assert(*bytes_per_block);
+}
+
+
+const char *
+svga_format_name(SVGA3dSurfaceFormat format)
+{
+   assert(format < Elements(format_cap_table));
+   return format_cap_table[format].name;
+}
+
+
+/**
+ * Is the given SVGA3dSurfaceFormat a signed or unsigned integer color format?
+ */
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32_SINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32_SINT:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R8G8B8A8_SINT:
+   case SVGA3D_R8G8_SINT:
+   case SVGA3D_R8_SINT:
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R10G10B10A2_UINT:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
+
+
+/**
+ * Given a texture format, return the expected data type returned from
+ * the texture sampler.  For example, UNORM8 formats return floating point
+ * values while SINT formats returned signed integer values.
+ * Note: this function could be moved into the gallum u_format.[ch] code
+ * if it's useful to anyone else.
+ */
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   enum tgsi_return_type t;
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ) {
+      if (util_format_is_depth_or_stencil(format)) {
+         t = TGSI_RETURN_TYPE_FLOAT; /* XXX revisit this */
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_UNORM : TGSI_RETURN_TYPE_UINT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_SNORM : TGSI_RETURN_TYPE_SINT;
+      }
+      else {
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+   else {
+      /* compressed format, shared exponent format, etc. */
+      switch (format) {
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+      case PIPE_FORMAT_DXT1_SRGB:
+      case PIPE_FORMAT_DXT1_SRGBA:
+      case PIPE_FORMAT_DXT3_SRGBA:
+      case PIPE_FORMAT_DXT5_SRGBA:
+      case PIPE_FORMAT_RGTC1_UNORM:
+      case PIPE_FORMAT_RGTC2_UNORM:
+      case PIPE_FORMAT_LATC1_UNORM:
+      case PIPE_FORMAT_LATC2_UNORM:
+      case PIPE_FORMAT_ETC1_RGB8:
+         t = TGSI_RETURN_TYPE_UNORM;
+         break;
+      case PIPE_FORMAT_RGTC1_SNORM:
+      case PIPE_FORMAT_RGTC2_SNORM:
+      case PIPE_FORMAT_LATC1_SNORM:
+      case PIPE_FORMAT_LATC2_SNORM:
+      case PIPE_FORMAT_R10G10B10X2_SNORM:
+         t = TGSI_RETURN_TYPE_SNORM;
+         break;
+      case PIPE_FORMAT_R11G11B10_FLOAT:
+      case PIPE_FORMAT_R9G9B9E5_FLOAT:
+         t = TGSI_RETURN_TYPE_FLOAT;
+         break;
+      default:
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+
+   return t;
+}
+
+
+/**
+ * Given an svga context, return true iff there are currently any integer color
+ * buffers attached to the framebuffer.
+ */
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga)
+{
+   unsigned i;
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      struct pipe_surface *cbuf = svga->curr.framebuffer.cbufs[i];
+
+      if (cbuf && util_format_is_pure_integer(cbuf->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Given an SVGA format, return the corresponding typeless format.
+ * If there is no typeless format, return the format unchanged.
+ */
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32A32_FLOAT:
+      return SVGA3D_R32G32B32A32_TYPELESS;
+   case SVGA3D_R32G32B32_FLOAT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32B32_SINT:
+      return SVGA3D_R32G32B32_TYPELESS;
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16B16A16_UNORM:
+   case SVGA3D_R16G16B16A16_SNORM:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16B16A16_FLOAT:
+      return SVGA3D_R16G16B16A16_TYPELESS;
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32G32_FLOAT:
+      return SVGA3D_R32G32_TYPELESS;
+   case SVGA3D_D32_FLOAT_S8X24_UINT:
+      return SVGA3D_R32G8X24_TYPELESS;
+   case SVGA3D_X32_TYPELESS_G8X24_UINT:
+      return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+   case SVGA3D_R10G10B10A2_UINT:
+   case SVGA3D_R10G10B10A2_UNORM:
+      return SVGA3D_R10G10B10A2_TYPELESS;
+   case SVGA3D_R8G8B8A8_UNORM:
+   case SVGA3D_R8G8B8A8_SNORM:
+   case SVGA3D_R8G8B8A8_UNORM_SRGB:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8B8A8_SINT:
+      return SVGA3D_R8G8B8A8_TYPELESS;
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16G16_UNORM:
+   case SVGA3D_R16G16_SNORM:
+   case SVGA3D_R16G16_FLOAT:
+      return SVGA3D_R16G16_TYPELESS;
+   case SVGA3D_D32_FLOAT:
+   case SVGA3D_R32_FLOAT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R32_SINT:
+      return SVGA3D_R32_TYPELESS;
+   case SVGA3D_D24_UNORM_S8_UINT:
+      return SVGA3D_R24G8_TYPELESS;
+   case SVGA3D_X24_TYPELESS_G8_UINT:
+      return SVGA3D_R24_UNORM_X8_TYPELESS;
+   case SVGA3D_R8G8_UNORM:
+   case SVGA3D_R8G8_SNORM:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8G8_SINT:
+      return SVGA3D_R8G8_TYPELESS;
+   case SVGA3D_R16_UNORM:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R16_SNORM:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R16_FLOAT:
+      return SVGA3D_R16_TYPELESS;
+   case SVGA3D_R8_UNORM:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R8_SNORM:
+   case SVGA3D_R8_SINT:
+      return SVGA3D_R8_TYPELESS;
+   case SVGA3D_B8G8R8A8_UNORM_SRGB:
+   case SVGA3D_B8G8R8A8_UNORM:
+      return SVGA3D_B8G8R8A8_TYPELESS;
+   case SVGA3D_B8G8R8X8_UNORM_SRGB:
+   case SVGA3D_B8G8R8X8_UNORM:
+      return SVGA3D_B8G8R8X8_TYPELESS;
+   case SVGA3D_BC4_UNORM:
+   case SVGA3D_BC4_SNORM:
+      return SVGA3D_BC4_TYPELESS;
+   case SVGA3D_BC5_UNORM:
+   case SVGA3D_BC5_SNORM:
+      return SVGA3D_BC5_TYPELESS;
+
+   /* Special cases (no corresponding _TYPELESS formats) */
+   case SVGA3D_A8_UNORM:
+   case SVGA3D_A4R4G4B4:
+   case SVGA3D_B5G5R5A1_UNORM:
+   case SVGA3D_B5G6R5_UNORM:
+   case SVGA3D_DXT1:
+   case SVGA3D_DXT2:
+   case SVGA3D_DXT3:
+   case SVGA3D_DXT4:
+   case SVGA3D_DXT5:
+   case SVGA3D_R11G11B10_FLOAT:
+   case SVGA3D_R9G9B9E5_SHAREDEXP:
+   case SVGA3D_Z_D32:
+   case SVGA3D_Z_D16:
+      return format;
    default:
-      debug_printf("format %u\n", (unsigned) format);
-      assert(!"unexpected format in svga_format_size()");
-      *bytes_per_block = 4;
+      debug_printf("Unexpected format %s in %s\n",
+                   svga_format_name(format), __FUNCTION__);
+      return format;
    }
 }
diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h
index 94c867acf77..0af218cb01a 100644
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@@ -28,6 +28,7 @@
 
 
 #include "pipe/p_format.h"
+#include "svga_context.h"
 #include "svga_types.h"
 #include "svga_reg.h"
 #include "svga3d_reg.h"
@@ -36,6 +37,27 @@
 struct svga_screen;
 
 
+/**
+ * Vertex format flags.  These are used to specify that some vertex formats
+ * need extra processing/conversion in the vertex shader.  For example,
+ * setting the W component to 1, or swapping R/B, or converting packed uint
+ * types to signed int/snorm.
+ */
+#define VF_ADJUST_RANGE     (1 << 0)
+#define VF_W_TO_1           (1 << 1)
+#define VF_U_TO_F_CAST      (1 << 2)  /* convert uint to float */
+#define VF_I_TO_F_CAST      (1 << 3)  /* convert sint to float */
+#define VF_BGRA             (1 << 4)  /* swap R/B */
+#define VF_PUINT_TO_SNORM   (1 << 5)  /* 10_10_10_2 to snorm */
+#define VF_PUINT_TO_USCALED (1 << 6)  /* 10_10_10_2 to uscaled */
+#define VF_PUINT_TO_SSCALED (1 << 7)  /* 10_10_10_2 to sscaled */
+
+
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags);
+
 enum SVGA3dSurfaceFormat
 svga_translate_format(struct svga_screen *ss,
                       enum pipe_format format,
@@ -52,5 +74,23 @@ svga_format_size(SVGA3dSurfaceFormat format,
                  unsigned *block_height,
                  unsigned *bytes_per_block);
 
+const char *
+svga_format_name(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format);
+
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format);
+
+
+// XXX: Move this to svga_context?
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga);
+
+
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format);
+
 
 #endif /* SVGA_FORMAT_H_ */
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
index 2890516c0cf..06bb3e3bd7e 100644
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -27,14 +27,15 @@
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 
 #include "svga_context.h"
-
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"
 
 
 static inline unsigned
-svga_translate_blend_factor(unsigned factor)
+svga_translate_blend_factor(const struct svga_context *svga, unsigned factor)
 {
    switch (factor) {
    case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
@@ -50,8 +51,21 @@ svga_translate_blend_factor(unsigned factor)
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
    case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_BLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_INVBLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_SRC1_COLOR:      return SVGA3D_BLENDOP_SRC1COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:  return SVGA3D_BLENDOP_INVSRC1COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:      return SVGA3D_BLENDOP_SRC1ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:  return SVGA3D_BLENDOP_INVSRC1ALPHA;
+   case 0:                                return SVGA3D_BLENDOP_ONE;
    default:
       assert(0);
       return SVGA3D_BLENDOP_ZERO;
@@ -74,18 +88,64 @@ svga_translate_blend_func(unsigned mode)
 }
 
 
+/**
+ * Define a vgpu10 blend state object for the given
+ * svga blend state.
+ */
+static void
+define_blend_state_object(struct svga_context *svga,
+                          struct svga_blend_state *bs)
+{
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   unsigned try;
+   int i;
+
+   assert(svga_have_vgpu10(svga));
+
+   bs->id = util_bitmask_add(svga->blend_object_id_bm);
+
+   for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) {
+      perRT[i].blendEnable = bs->rt[i].blend_enable;
+      perRT[i].srcBlend = bs->rt[i].srcblend;
+      perRT[i].destBlend = bs->rt[i].dstblend;
+      perRT[i].blendOp = bs->rt[i].blendeq;
+      perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha;
+      perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
+      perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
+      perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
+      perRT[i].logicOpEnable = 0;
+      perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+      assert(perRT[i].srcBlend == perRT[0].srcBlend);
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
+                                           bs->id,
+                                           bs->alpha_to_coverage,
+                                           bs->independent_blend_enable,
+                                           perRT);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_blend_state(struct pipe_context *pipe,
                         const struct pipe_blend_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
    unsigned i;
 
- 
    /* Fill in the per-rendertarget blend state.  We currently only
-    * have one rendertarget.
+    * support independent blend enable and colormask per render target.
     */
-   for (i = 0; i < 1; i++) {
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       /* No way to set this in SVGA3D, and no way to correctly implement it on
        * top of D3D9 API.  Instead we try to simulate with various blend modes.
        */
@@ -107,6 +167,9 @@ svga_create_blend_state(struct pipe_context *pipe,
             break;
          case PIPE_LOGICOP_COPY:
             blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
             break;
          case PIPE_LOGICOP_COPY_INVERTED:
             blend->rt[i].blend_enable   = TRUE;
@@ -169,38 +232,99 @@ svga_create_blend_state(struct pipe_context *pipe,
          case PIPE_LOGICOP_EQUIV:
             /* Fill these in with plausible values */
             blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
             break;
          default:
             assert(0);
             break;
          }
+         blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
+         blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
+         blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
       }
       else {
-         blend->rt[i].blend_enable   = templ->rt[0].blend_enable;
-
-         if (templ->rt[0].blend_enable) {
-            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rt[0].rgb_src_factor);
-            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor);
-            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rt[0].rgb_func);
-            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor);
-            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor);
-            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->rt[0].alpha_func);
+         /* Note: the vgpu10 device does not yet support independent
+          * blend terms per render target.  Target[0] always specifies the
+          * blending terms.
+          */
+         if (templ->independent_blend_enable || templ->rt[0].blend_enable) {
+            /* always use the 0th target's blending terms for now */
+            blend->rt[i].srcblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor);
+            blend->rt[i].dstblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor);
+            blend->rt[i].blendeq =
+               svga_translate_blend_func(templ->rt[0].rgb_func);
+            blend->rt[i].srcblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor);
+            blend->rt[i].dstblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor);
+            blend->rt[i].blendeq_alpha =
+               svga_translate_blend_func(templ->rt[0].alpha_func);
 
             if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
                 blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
-                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
-            {
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq) {
                blend->rt[i].separate_alpha_blend_enable = TRUE;
             }
          }
+         else {
+            /* disabled - default blend terms */
+            blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+            blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+         }
+
+         if (templ->independent_blend_enable) {
+            blend->rt[i].blend_enable = templ->rt[i].blend_enable;
+         }
+         else {
+            blend->rt[i].blend_enable = templ->rt[0].blend_enable;
+         }
       }
 
-      blend->rt[i].writemask = templ->rt[0].colormask;
+      /* Some GL blend modes are not supported by the VGPU9 device (there's
+       * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA).
+       * When we set this flag, we copy the constant blend alpha value
+       * to the R, G, B components.
+       * This works as long as the src/dst RGB blend factors doesn't use
+       * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA
+       * at the same time.  There's no work-around for that.
+       */
+      if (!svga_have_vgpu10(svga)) {
+         if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) {
+            blend->blend_color_alpha = TRUE;
+         }
+      }
+
+      if (templ->independent_blend_enable) {
+         blend->rt[i].writemask = templ->rt[i].colormask;
+      }
+      else {
+         blend->rt[i].writemask = templ->rt[0].colormask;
+      }
+   }
+
+   blend->independent_blend_enable = templ->independent_blend_enable;
+
+   blend->alpha_to_coverage = templ->alpha_to_coverage;
+
+   if (svga_have_vgpu10(svga)) {
+      define_blend_state_object(svga, blend);
    }
 
    return blend;
 }
 
+
 static void svga_bind_blend_state(struct pipe_context *pipe,
                                   void *blend)
 {
@@ -210,9 +334,30 @@ static void svga_bind_blend_state(struct pipe_context *pipe,
    svga->dirty |= SVGA_NEW_BLEND;
 }
 
-
-static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+static void svga_delete_blend_state(struct pipe_context *pipe,
+                                    void *blend)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_blend_state *bs =
+      (struct svga_blend_state *) blend;
+
+   if (bs->id != SVGA3D_INVALID_ID) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (bs->id == svga->state.hw_draw.blend_id)
+         svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->blend_object_id_bm, bs->id);
+      bs->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(blend);
 }
 
@@ -235,6 +380,3 @@ void svga_init_blend_functions( struct svga_context *svga )
 
    svga->pipe.set_blend_color = svga_set_blend_color;
 }
-
-
-
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index dbb9f4b5172..2b34f964096 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -29,6 +29,7 @@
 #include "svga_cmd.h"
 #include "svga_surface.h"
 
+//#include "util/u_blit_sw.h"
 #include "util/u_format.h"
 #include "util/u_surface.h"
 
@@ -159,7 +160,8 @@ static void svga_blit(struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct pipe_blit_info info = *blit_info;
 
-   if (info.src.resource->nr_samples > 1 &&
+   if (!svga_have_vgpu10(svga) &&
+       info.src.resource->nr_samples > 1 &&
        info.dst.resource->nr_samples <= 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format) &&
        !util_format_is_pure_integer(info.src.resource->format)) {
@@ -171,12 +173,8 @@ static void svga_blit(struct pipe_context *pipe,
       return; /* done */
    }
 
-   if (info.mask & PIPE_MASK_S) {
-      debug_printf("svga: cannot blit stencil, skipping\n");
-      info.mask &= ~PIPE_MASK_S;
-   }
-
-   if (!util_blitter_is_blit_supported(svga->blitter, &info)) {
+   if ((info.mask & PIPE_MASK_S) ||
+       !util_blitter_is_blit_supported(svga->blitter, &info)) {
       debug_printf("svga: blit unsupported %s -> %s\n",
                    util_format_short_name(info.src.resource->format),
                    util_format_short_name(info.dst.resource->format));
@@ -188,9 +186,9 @@ static void svga_blit(struct pipe_context *pipe,
    util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
    util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
    util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
-   /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/
-   /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
-                     (struct pipe_stream_output_target**)svga->so_targets);*/
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
    util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
    util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
    util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
@@ -199,14 +197,14 @@ static void svga_blit(struct pipe_context *pipe,
    util_blitter_save_depth_stencil_alpha(svga->blitter,
                                          (void*)svga->curr.depth);
    util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
-   /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
    util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
    util_blitter_save_fragment_sampler_states(svga->blitter,
-                     svga->curr.num_samplers,
-                     (void**)svga->curr.sampler);
+                     svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
+                     (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]);
    util_blitter_save_fragment_sampler_views(svga->blitter,
-                     svga->curr.num_sampler_views,
-                     svga->curr.sampler_views);
+                     svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT],
+                     svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]);
    /*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query,
                                       svga->render_cond_cond, svga->render_cond_mode);*/
    util_blitter_blit(svga->blitter, &info);
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
index c4edced9bae..bab61780610 100644
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -34,6 +34,78 @@
 #include "svga_surface.h"
 
 
+/**
+ * Clear the whole color buffer(s) by drawing a quad.  For VGPU10 we use
+ * this when clearing integer render targets.  We'll also clear the
+ * depth and/or stencil buffers if the clear_buffers mask specifies them.
+ */
+static void
+clear_buffers_with_quad(struct svga_context *svga,
+                        unsigned clear_buffers,
+                        const union pipe_color_union *color,
+                        double depth, unsigned stencil)
+{
+   const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+
+   util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
+   util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
+   util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
+   util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
+   util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
+   util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+   util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
+   util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
+   util_blitter_save_depth_stencil_alpha(svga->blitter,
+                                         (void*)svga->curr.depth);
+   util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+
+   util_blitter_clear(svga->blitter,
+                      fb->width, fb->height,
+                      1, /* num_layers */
+                      clear_buffers, color,
+                      depth, stencil);
+}
+
+
+/**
+ * Check if any of the color buffers are integer buffers.
+ */
+static boolean
+is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers)
+{
+   unsigned i;
+
+   for (i = 0; i < fb->nr_cbufs; i++) {
+      if ((buffers & (PIPE_CLEAR_COLOR0 << i)) &&
+          fb->cbufs[i] &&
+          util_format_is_pure_integer(fb->cbufs[i]->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Check if the integer values in the clear color can be represented
+ * by floats.  If so, we can use the VGPU10 ClearRenderTargetView command.
+ * Otherwise, we need to clear with a quad.
+ */
+static boolean
+ints_fit_in_floats(const union pipe_color_union *color)
+{
+   const int max = 1 << 24;
+   return (color->i[0] <= max &&
+           color->i[1] <= max &&
+           color->i[2] <= max &&
+           color->i[3] <= max);
+}
+
+
 static enum pipe_error
 try_clear(struct svga_context *svga, 
           unsigned buffers,
@@ -52,7 +124,7 @@ try_clear(struct svga_context *svga,
    if (ret != PIPE_OK)
       return ret;
 
-   if (svga->rebind.rendertargets) {
+   if (svga->rebind.flags.rendertargets) {
       ret = svga_reemit_framebuffer_bindings(svga);
       if (ret != PIPE_OK) {
          return ret;
@@ -71,29 +143,72 @@ try_clear(struct svga_context *svga,
       if (buffers & PIPE_CLEAR_DEPTH)
          flags |= SVGA3D_CLEAR_DEPTH;
 
-      if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
-          (buffers & PIPE_CLEAR_STENCIL))
+      if (buffers & PIPE_CLEAR_STENCIL)
          flags |= SVGA3D_CLEAR_STENCIL;
 
       rect.w = MAX2(rect.w, fb->zsbuf->width);
       rect.h = MAX2(rect.h, fb->zsbuf->height);
    }
 
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+   if (!svga_have_vgpu10(svga) &&
+       !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
       restore_viewport = TRUE;
       ret = SVGA3D_SetViewport(svga->swc, &rect);
       if (ret != PIPE_OK)
          return ret;
    }
 
-   ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
-                          rect.x, rect.y, rect.w, rect.h);
-   if (ret != PIPE_OK)
-      return ret;
+   if (svga_have_vgpu10(svga)) {
+      if (flags & SVGA3D_CLEAR_COLOR) {
+         unsigned i;
+
+         if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+            clear_buffers_with_quad(svga, buffers, color, depth, stencil);
+            /* We also cleared depth/stencil, so that's done */
+            flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
+         }
+         else {
+            struct pipe_surface *rtv;
+
+            /* Issue VGPU10 Clear commands */
+            for (i = 0; i < fb->nr_cbufs; i++) {
+               if ((fb->cbufs[i] == NULL) ||
+                   !(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                  continue;
+
+               rtv = svga_validate_surface_view(svga,
+                                                svga_surface(fb->cbufs[i]));
+               if (rtv == NULL)
+                  return PIPE_ERROR_OUT_OF_MEMORY;
+
+               ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
+                                                         rtv, color->f);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+      if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) {
+         struct pipe_surface *dsv =
+            svga_validate_surface_view(svga, svga_surface(fb->zsbuf));
+         if (dsv == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags,
+                                                   stencil, (float) depth);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
+                             rect.x, rect.y, rect.w, rect.h);
+      if (ret != PIPE_OK)
+         return ret;
+   }
 
    if (restore_viewport) {
-      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport);
    }
    
    return ret;
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c
index c32b66d416b..8150879ea13 100644
--- a/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -48,28 +48,46 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
                                      uint shader, uint index,
                                      struct pipe_constant_buffer *cb)
 {
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
    struct svga_context *svga = svga_context(pipe);
    struct pipe_resource *buf = cb ? cb->buffer : NULL;
-
-   if (cb && cb->user_buffer) {
-      buf = svga_user_buffer_create(pipe->screen,
-                                    (void *) cb->user_buffer,
-                                    cb->buffer_size,
-                                    PIPE_BIND_CONSTANT_BUFFER);
+   unsigned buffer_size = 0;
+
+   if (cb) {
+      buffer_size = cb->buffer_size;
+      if (cb->user_buffer) {
+         buf = svga_user_buffer_create(pipe->screen,
+                                       (void *) cb->user_buffer,
+                                       cb->buffer_size,
+                                       PIPE_BIND_CONSTANT_BUFFER);
+      }
    }
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(index == 0);
+   assert(index < Elements(svga->curr.constbufs[shader]));
+   assert(index < svgascreen->max_const_buffers);
+   (void) svgascreen;
+
+   pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf);
+
+   /* Make sure the constant buffer size to be updated is within the
+    * limit supported by the device.
+    */
+   svga->curr.constbufs[shader][index].buffer_size =
+      MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE);
 
-   pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
-   svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
-   svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
-   svga->curr.cbufs[shader].user_buffer = NULL; /* not used */
+   svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0;
+   svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */
 
    if (shader == PIPE_SHADER_FRAGMENT)
       svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
-   else
+   else if (shader == PIPE_SHADER_VERTEX)
       svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_GS_CONST_BUFFER;
+
+   /* update bitmask of dirty const buffers */
+   svga->state.dirty_constbufs[shader] |= (1 << index);
 
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&buf, NULL);
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index 8db21fd7476..5ea623be4d9 100644
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -23,13 +23,15 @@
  *
  **********************************************************/
 
-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
 #include "svga_context.h"
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"
 
 
 static inline unsigned
@@ -69,10 +71,67 @@ svga_translate_stencil_op(unsigned op)
 }
 
 
+/**
+ * Define a vgpu10 depth/stencil state object for the given
+ * svga depth/stencil state.
+ */
+static void
+define_depth_stencil_state_object(struct svga_context *svga,
+                                  struct svga_depth_stencil_state *ds)
+{
+   unsigned try;
+
+   assert(svga_have_vgpu10(svga));
+
+   ds->id = util_bitmask_add(svga->ds_object_id_bm);
+
+   /* spot check that these comparision tokens are the same */
+   assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+   assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+   assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      /* Note: we use the ds->stencil[0].enabled value for both the front
+       * and back-face enables.  If single-side stencil is used, we'll have
+       * set the back state the same as the front state.
+       */
+      ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc,
+                                                  ds->id,
+                                                  /* depth/Z */
+                                                  ds->zenable,
+                                                  ds->zwriteenable,
+                                                  ds->zfunc,
+                                                  /* Stencil */
+                                                  ds->stencil[0].enabled, /*f|b*/
+                                                  ds->stencil[0].enabled, /*f*/
+                                                  ds->stencil[0].enabled, /*b*/
+                                                  ds->stencil_mask,
+                                                  ds->stencil_writemask,
+                                                  /* front stencil */
+                                                  ds->stencil[0].fail,
+                                                  ds->stencil[0].zfail,
+                                                  ds->stencil[0].pass,
+                                                  ds->stencil[0].func,
+                                                  /* back stencil */
+                                                  ds->stencil[1].fail,
+                                                  ds->stencil[1].zfail,
+                                                  ds->stencil[1].pass,
+                                                  ds->stencil[1].func);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_depth_stencil_state(struct pipe_context *pipe,
 				const struct pipe_depth_stencil_alpha_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
 
    /* Don't try to figure out CW/CCW correspondence with
@@ -92,10 +151,18 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
       ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
    }
+   else {
+      ds->stencil[0].func = SVGA3D_CMP_ALWAYS;
+      ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP;
+   }
 
 
    ds->stencil[1].enabled = templ->stencil[1].enabled;
    if (templ->stencil[1].enabled) {
+      assert(templ->stencil[0].enabled);
+      /* two-sided stencil */
       ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
       ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
       ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
@@ -104,6 +171,13 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
       ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
    }
+   else {
+      /* back face state is same as front-face state */
+      ds->stencil[1].func = ds->stencil[0].func;
+      ds->stencil[1].fail = ds->stencil[0].fail;
+      ds->stencil[1].zfail = ds->stencil[0].zfail;
+      ds->stencil[1].pass = ds->stencil[0].pass;
+   }
 
 
    ds->zenable = templ->depth.enabled;
@@ -111,12 +185,22 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
       ds->zfunc = svga_translate_compare_func(templ->depth.func);
       ds->zwriteenable = templ->depth.writemask;
    }
+   else {
+      ds->zfunc = SVGA3D_CMP_ALWAYS;
+   }
 
    ds->alphatestenable = templ->alpha.enabled;
    if (ds->alphatestenable) {
       ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
       ds->alpharef = templ->alpha.ref_value;
    }
+   else {
+      ds->alphafunc = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (svga_have_vgpu10(svga)) {
+      define_depth_stencil_state_object(svga, ds);
+   }
 
    return ds;
 }
@@ -126,13 +210,43 @@ static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
    svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
-   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA;
 }
 
 static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
                                             void *depth_stencil)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_depth_stencil_state *ds =
+      (struct svga_depth_stencil_state *) depth_stencil;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      assert(ds->id != SVGA3D_INVALID_ID);
+
+      ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (ds->id == svga->state.hw_draw.depth_stencil_id)
+         svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->ds_object_id_bm, ds->id);
+      ds->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(depth_stencil);
 }
 
@@ -142,6 +256,11 @@ static void svga_set_stencil_ref( struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
    svga->curr.stencil_ref = *stencil_ref;
 
    svga->dirty |= SVGA_NEW_STENCIL_REF;
@@ -151,6 +270,11 @@ static void
 svga_set_sample_mask(struct pipe_context *pipe,
                      unsigned sample_mask)
 {
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.sample_mask = sample_mask;
+
+   svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 87f6b3d7126..303d4565cdb 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -27,7 +27,9 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_prim_restart.h"
 #include "util/u_time.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"
 
 #include "svga_hw_reg.h"
@@ -35,12 +37,12 @@
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"
 
-
 static enum pipe_error
 retry_draw_range_elements( struct svga_context *svga,
                            struct pipe_resource *index_buffer,
@@ -51,26 +53,31 @@ retry_draw_range_elements( struct svga_context *svga,
                            unsigned prim,
                            unsigned start,
                            unsigned count,
+                           unsigned start_instance,
                            unsigned instance_count,
                            boolean do_retry )
 {
    enum pipe_error ret = PIPE_OK;
 
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret != PIPE_OK)
       goto retry;
 
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
    ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
                                          index_buffer, index_size, index_bias,
                                          min_index, max_index,
-                                         prim, start, count );
+                                         prim, start, count,
+                                         start_instance, instance_count);
    if (ret != PIPE_OK)
       goto retry;
 
@@ -85,7 +92,7 @@ retry:
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
                                         prim, start, count,
-                                        instance_count, FALSE );
+                                        start_instance, instance_count, FALSE );
    }
 
    return ret;
@@ -94,27 +101,28 @@ retry:
 
 static enum pipe_error
 retry_draw_arrays( struct svga_context *svga,
-                   unsigned prim,
-                   unsigned start,
-                   unsigned count,
-                   unsigned instance_count,
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count,
                    boolean do_retry )
 {
    enum pipe_error ret;
 
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret != PIPE_OK)
       goto retry;
 
-   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
-                                 start, count );
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
+                                start_instance, instance_count);
    if (ret != PIPE_OK)
       goto retry;
 
@@ -125,18 +133,41 @@ retry:
    {
       svga_context_flush( svga, NULL );
 
-      return retry_draw_arrays( svga,
-                                prim,
-                                start,
-                                count,
-                                instance_count,
-                                FALSE );
+      return retry_draw_arrays(svga, prim, start, count,
+                               start_instance, instance_count,
+                               FALSE );
    }
 
    return ret;
 }
 
 
+/**
+ * Determine if we need to implement primitive restart with a fallback
+ * path which breaks the original primitive into sub-primitive at the
+ * restart indexes.
+ */
+static boolean
+need_fallback_prim_restart(const struct svga_context *svga,
+                           const struct pipe_draw_info *info)
+{
+   if (info->primitive_restart && info->indexed) {
+      if (!svga_have_vgpu10(svga))
+         return TRUE;
+      else if (!svga->state.sw.need_swtnl) {
+         if (svga->curr.ib.index_size == 1)
+            return TRUE; /* no device support for 1-byte indexes */
+         else if (svga->curr.ib.index_size == 2)
+            return info->restart_index != 0xffff;
+         else
+            return info->restart_index != 0xffffffff;
+      }
+   }
+
+   return FALSE;
+}
+
+
 static void
 svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
@@ -148,7 +179,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 
    svga->num_draw_calls++;  /* for SVGA_QUERY_DRAW_CALLS */
 
-   if (!u_trim_pipe_prim( info->mode, &count ))
+   if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
+       svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
       return;
 
    /*
@@ -165,6 +197,17 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
    }
 
+   if (need_fallback_prim_restart(svga, info)) {
+      enum pipe_error r;
+      r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info);
+      assert(r == PIPE_OK);
+      (void) r;
+      return;
+   }
+
+   if (!u_trim_pipe_prim( info->mode, &count ))
+      return;
+
    needed_swtnl = svga->state.sw.need_swtnl;
 
    svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
@@ -208,17 +251,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->max_index,
                                           info->mode,
                                           info->start + offset,
-                                          info->count,
+                                          count,
+                                          info->start_instance,
                                           info->instance_count,
                                           TRUE );
       }
       else {
-         ret = retry_draw_arrays( svga,
-                                  info->mode,
-                                  info->start,
-                                  info->count,
-                                  info->instance_count,
-                                  TRUE );
+         ret = retry_draw_arrays(svga, info->mode, info->start, count,
+                                 info->start_instance, info->instance_count,
+                                 TRUE);
       }
    }
 
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index 75299c50db7..4a9b3c96a54 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -31,7 +31,6 @@
 #include "draw/draw_context.h"
 
 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
@@ -63,12 +62,6 @@ svga_create_fs_state(struct pipe_context *pipe,
 
    fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ);
 
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, fs->base.id,
-                   fs->base.info.num_inputs, fs->base.info.num_outputs);
-   }
-
    return fs;
 }
 
@@ -94,20 +87,30 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(fs->base.parent == NULL);
+
    draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
 
    for (variant = fs->base.variants; variant; variant = tmp) {
       tmp = variant->next;
 
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.fs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.fs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+            assert(ret == PIPE_OK);
+         }
          svga->state.hw_draw.fs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+         assert(ret == PIPE_OK);
+      }
    }
 
    FREE((void *)fs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_pipe_gs.c b/src/gallium/drivers/svga/svga_pipe_gs.c
index 752c8c700e0..d614e9d6c01 100644
--- a/src/gallium/drivers/svga/svga_pipe_gs.c
+++ b/src/gallium/drivers/svga/svga_pipe_gs.c
@@ -84,7 +84,7 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
-   struct svga_geometry_shader *next_gs;  
+   struct svga_geometry_shader *next_gs;
    struct svga_shader_variant *variant, *tmp;
    enum pipe_error ret;
 
@@ -96,9 +96,9 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader)
 
    /* Free the list of geometry shaders */
    while (gs) {
-      next_gs = (struct svga_geometry_shader *)gs->base.next;  
+      next_gs = (struct svga_geometry_shader *)gs->base.next;
 
-      if (gs->base.stream_output != NULL) 
+      if (gs->base.stream_output != NULL)
          svga_delete_stream_output(svga, gs->base.stream_output);
 
       draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index 1df32a13b82..c8020da7c7a 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -27,6 +27,7 @@
 
 #include "util/u_framebuffer.h"
 #include "util/u_inlines.h"
+#include "util/u_pstipple.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
@@ -46,10 +47,37 @@ static void svga_set_scissor_states( struct pipe_context *pipe,
 }
 
 
-static void svga_set_polygon_stipple( struct pipe_context *pipe,
-                                      const struct pipe_poly_stipple *stipple )
+static void
+svga_set_polygon_stipple(struct pipe_context *pipe,
+                         const struct pipe_poly_stipple *stipple)
 {
-   /* overridden by the draw module */
+   struct svga_context *svga = svga_context(pipe);
+
+   /* release old texture */
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* release old sampler view */
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+
+   /* create new stipple texture */
+   svga->polygon_stipple.texture =
+      util_pstipple_create_stipple_texture(pipe, stipple->stipple);
+
+   /* create new sampler view */
+   svga->polygon_stipple.sampler_view =
+      (struct svga_pipe_sampler_view *)
+      util_pstipple_create_sampler_view(pipe,
+                                        svga->polygon_stipple.texture);
+
+   /* allocate sampler state, if first time */
+   if (!svga->polygon_stipple.sampler) {
+      svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe);
+   }
+
+   svga->dirty |= SVGA_NEW_STIPPLE;
 }
 
 
@@ -83,6 +111,11 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
    boolean propagate = FALSE;
    unsigned i;
 
+   /* make sure any pending drawing calls are flushed before changing
+    * the framebuffer state
+    */
+   svga_hwtnl_flush_retry(svga);
+
    dst->width = fb->width;
    dst->height = fb->height;
    dst->nr_cbufs = fb->nr_cbufs;
@@ -99,9 +132,6 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
    }
 
    if (propagate) {
-      /* make sure that drawing calls comes before propagation calls */
-      svga_hwtnl_flush_retry( svga );
-   
       for (i = 0; i < dst->nr_cbufs; i++) {
          struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
          if (dst->cbufs[i] && dst->cbufs[i] != s)
@@ -109,13 +139,30 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
       }
    }
 
-   /* XXX: Actually the virtual hardware may support rendertargets with
-    * different size, depending on the host API and driver, but since we cannot
-    * know that make no such assumption here. */
-   for(i = 0; i < fb->nr_cbufs; ++i) {
-      if (fb->zsbuf && fb->cbufs[i]) {
-         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
-         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+   /* Check that all surfaces are the same size.
+    * Actually, the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver,
+    */
+   {
+      int width = 0, height = 0;
+      if (fb->zsbuf) {
+         width = fb->zsbuf->width;
+         height = fb->zsbuf->height;
+      }
+      for (i = 0; i < fb->nr_cbufs; ++i) {
+         if (fb->cbufs[i]) {
+            if (width && height) {
+               if (fb->cbufs[i]->width != width ||
+                   fb->cbufs[i]->height != height) {
+                  debug_warning("Mixed-size color and depth/stencil surfaces "
+                                "may not work properly");
+               }
+            }
+            else {
+               width = fb->cbufs[i]->width;
+               height = fb->cbufs[i]->height;
+            }
+         }
       }
    }
 
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 208a2cd14bf..7081e5a1c43 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2008-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -25,6 +25,8 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
+
+#include "util/u_bitmask.h"
 #include "util/u_memory.h"
 
 #include "svga_cmd.h"
@@ -42,16 +44,26 @@ struct pipe_query {
    int dummy;
 };
 
-
 struct svga_query {
    struct pipe_query base;
    unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
    SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
 
+   unsigned id;                    /** Per-context query identifier */
+
+   struct pipe_fence_handle *fence;
+
    /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
+
+   /* For VGPU9 */
    struct svga_winsys_buffer *hwbuf;
    volatile SVGA3dQueryResult *queryResult;
-   struct pipe_fence_handle *fence;
+
+   /** For VGPU10 */
+   struct svga_winsys_gb_query *gb_query;
+   SVGA3dDXQueryFlags flags;
+   unsigned offset;                /**< offset to the gb_query memory */
+   struct pipe_query *predicate;   /** The associated query that can be used for predicate */
 
    /** For non-GPU SVGA_QUERY_x queries */
    uint64_t begin_count, end_count;
@@ -72,50 +84,641 @@ svga_get_query_result(struct pipe_context *pipe,
                       boolean wait,
                       union pipe_query_result *result);
 
+static enum pipe_error
+define_query_vgpu9(struct svga_context *svga,
+                   struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+
+   sq->hwbuf = svga_winsys_buffer_create(svga, 1,
+                                         SVGA_BUFFER_USAGE_PINNED,
+                                         sizeof *sq->queryResult);
+   if (!sq->hwbuf)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   sq->queryResult = (SVGA3dQueryResult *)
+                     sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
+   if (!sq->queryResult) {
+      sws->buffer_destroy(sws, sq->hwbuf);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   sq->queryResult->totalSize = sizeof *sq->queryResult;
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+   /* We request the buffer to be pinned and assume it is always mapped.
+    * The reason is that we don't want to wait for fences when checking the
+    * query status.
+    */
+   sws->buffer_unmap(sws, sq->hwbuf);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result.
+       * We cannot let go of the existing buffer and just get a new one
+       * because its storage may be reused for other purposes and clobbered
+       * by the host when it determines the query result.  So the only
+       * option here is to wait for the existing query's result -- not a
+       * big deal, given that no sane application would do this.
+       */
+       uint64_t result;
+       svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
+       assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   }
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
+                       boolean wait, uint64_t *result)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+   SVGA3dQueryState state;
+
+   if (!sq->fence) {
+      /* The query status won't be updated by the host unless
+       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
+       * a synchronous wait on the host.
+       */
+      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      }
+      assert (ret == PIPE_OK);
+      svga_context_flush(svga, &sq->fence);
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if (state == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          state == SVGA3D_QUERYSTATE_FAILED);
+
+   *result = (uint64_t)sq->queryResult->result32;
+   return TRUE;
+}
+
+
+/**
+ * VGPU10
+ *
+ * There is one query mob allocated for each context to be shared by all
+ * query types. The mob is used to hold queries's state and result. Since
+ * each query result type is of different length, to ease the query allocation
+ * management, the mob is divided into memory blocks. Each memory block
+ * will hold queries of the same type. Multiple memory blocks can be allocated
+ * for a particular query type.
+ *
+ * Currently each memory block is of 184 bytes. We support up to 128
+ * memory blocks. The query memory size is arbitrary right now.
+ * Each occlusion query takes about 8 bytes. One memory block can accomodate
+ * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * queries. That seems reasonable for now. If we think this limit is
+ * not enough, we can increase the limit or try to grow the mob in runtime.
+ * Note, SVGA device does not impose one mob per context for queries,
+ * we could allocate multiple mobs for queries; however, wddm KMD does not
+ * currently support that.
+ *
+ * Also note that the GL guest driver does not issue any of the
+ * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
+ */
+#define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
+#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+
+struct svga_qmem_alloc_entry
+{
+   unsigned start_offset;               /* start offset of the memory block */
+   unsigned block_index;                /* block index of the memory block */
+   unsigned query_size;                 /* query size in this memory block */
+   unsigned nquery;                     /* number of queries allocated */
+   struct util_bitmask *alloc_mask;     /* allocation mask */
+   struct svga_qmem_alloc_entry *next;  /* next memory block */
+};
+
+
+/**
+ * Allocate a memory block from the query object memory
+ * \return -1 if out of memory, else index of the query memory block
+ */
+static int
+allocate_query_block(struct svga_context *svga)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available query block */
+   index = util_bitmask_add(svga->gb_query_alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   if (offset >= svga->gb_query_len) {
+      unsigned i;
+
+      /**
+       * All the memory blocks are allocated, lets see if there is
+       * any empty memory block around that can be freed up.
+       */
+      index = -1;
+      for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
+         struct svga_qmem_alloc_entry *alloc_entry;
+         struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
+
+         alloc_entry = svga->gb_query_map[i];
+         while (alloc_entry && index == -1) {
+            if (alloc_entry->nquery == 0) {
+               /* This memory block is empty, it can be recycled. */
+               if (prev_alloc_entry) {
+                  prev_alloc_entry->next = alloc_entry->next;
+               } else {
+                  svga->gb_query_map[i] = alloc_entry->next;
+               }
+               index = alloc_entry->block_index;
+            } else {
+               prev_alloc_entry = alloc_entry;
+               alloc_entry = alloc_entry->next;
+            }
+         }
+      }
+   }
+
+   return index;
+}
+
+/**
+ * Allocate a slot in the specified memory block.
+ * All slots in this memory block are of the same size.
+ *
+ * \return -1 if out of memory, else index of the query slot
+ */
+static int
+allocate_query_slot(struct svga_context *svga,
+                    struct svga_qmem_alloc_entry *alloc)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available slot */
+   index = util_bitmask_add(alloc->alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * alloc->query_size;
+   if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
+      return -1;
+
+   alloc->nquery++;
+
+   return index;
+}
+
+/**
+ * Deallocate the specified slot in the memory block.
+ * If all slots are freed up, then deallocate the memory block
+ * as well, so it can be allocated for other query type
+ */
+static void
+deallocate_query_slot(struct svga_context *svga,
+                      struct svga_qmem_alloc_entry *alloc,
+                      unsigned index)
+{
+   assert(index != UTIL_BITMASK_INVALID_INDEX);
+
+   util_bitmask_clear(alloc->alloc_mask, index);
+   alloc->nquery--;
+
+   /**
+    * Don't worry about deallocating the empty memory block here.
+    * The empty memory block will be recycled when no more memory block
+    * can be allocated.
+    */
+}
+
+static struct svga_qmem_alloc_entry *
+allocate_query_block_entry(struct svga_context *svga,
+                           unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int block_index = -1;
+
+   block_index = allocate_query_block(svga);
+   if (block_index == -1)
+      return NULL;
+   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+   if (alloc_entry == NULL)
+      return NULL;
+
+   alloc_entry->block_index = block_index;
+   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   alloc_entry->nquery = 0;
+   alloc_entry->alloc_mask = util_bitmask_create();
+   alloc_entry->next = NULL;
+   alloc_entry->query_size = len;
+
+   return alloc_entry;
+}
+
+/**
+ * Allocate a memory slot for a query of the specified type.
+ * It will first search through the memory blocks that are allocated
+ * for the query type. If no memory slot is available, it will try
+ * to allocate another memory block within the query object memory for
+ * this query type.
+ */
+static int
+allocate_query(struct svga_context *svga,
+               SVGA3dQueryType type,
+               unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int slot_index = -1;
+   unsigned offset;
+
+   assert(type < SVGA_QUERY_MAX);
+
+   alloc_entry = svga->gb_query_map[type];
+
+   if (alloc_entry == NULL) {
+      /**
+       * No query memory block has been allocated for this query type,
+       * allocate one now
+       */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (alloc_entry == NULL)
+         return -1;
+      svga->gb_query_map[type] = alloc_entry;
+   }
+
+   /* Allocate a slot within the memory block allocated for this query type */
+   slot_index = allocate_query_slot(svga, alloc_entry);
+
+   if (slot_index == -1) {
+      /* This query memory block is full, allocate another one */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (alloc_entry == NULL)
+         return -1;
+      alloc_entry->next = svga->gb_query_map[type];
+      svga->gb_query_map[type] = alloc_entry;
+      slot_index = allocate_query_slot(svga, alloc_entry);
+   }
+
+   assert(slot_index != -1);
+   offset = slot_index * len + alloc_entry->start_offset;
+
+   return offset;
+}
+
+
+/**
+ * Deallocate memory slot allocated for the specified query
+ */
+static void
+deallocate_query(struct svga_context *svga,
+                 struct svga_query *sq)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   unsigned slot_index;
+   unsigned offset = sq->offset;
+
+   alloc_entry = svga->gb_query_map[sq->svga_type];
+
+   while (alloc_entry) {
+      if (offset >= alloc_entry->start_offset &&
+          offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
+
+         /* The slot belongs to this memory block, deallocate it */
+         slot_index = (offset - alloc_entry->start_offset) /
+                      alloc_entry->query_size;
+         deallocate_query_slot(svga, alloc_entry, slot_index);
+         alloc_entry = NULL;
+      } else {
+         alloc_entry = alloc_entry->next;
+      }
+   }
+}
+
+
+/**
+ * Destroy the gb query object and all the related query structures
+ */
+static void
+destroy_gb_query_obj(struct svga_context *svga)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   unsigned i;
+
+   for (i = 0; i < SVGA_QUERY_MAX; i++) {
+      struct svga_qmem_alloc_entry *alloc_entry, *next;
+      alloc_entry = svga->gb_query_map[i];
+      while (alloc_entry) {
+         next = alloc_entry->next;
+         util_bitmask_destroy(alloc_entry->alloc_mask);
+         FREE(alloc_entry);
+         alloc_entry = next;
+      }
+      svga->gb_query_map[i] = NULL;
+   }
+
+   if (svga->gb_query)
+      sws->query_destroy(sws, svga->gb_query);
+   svga->gb_query = NULL;
+
+   util_bitmask_destroy(svga->gb_query_alloc_mask);
+}
+
+/**
+ * Define query and create the gb query object if it is not already created.
+ * There is only one gb query object per context which will be shared by
+ * queries of all types.
+ */
+static enum pipe_error
+define_query_vgpu10(struct svga_context *svga,
+                    struct svga_query *sq, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   int qlen;
+   enum pipe_error ret = PIPE_OK;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   if (svga->gb_query == NULL) {
+      /* Create a gb query object */
+      svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
+      if (!svga->gb_query)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
+      memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
+      svga->gb_query_alloc_mask = util_bitmask_create();
+
+      /* Bind the query object to the context */
+      if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         svga->swc->query_bind(svga->swc, svga->gb_query,
+                               SVGA_QUERY_FLAG_SET);
+      }
+   }
+
+   sq->gb_query = svga->gb_query;
+
+   /* Allocate an integer ID for this query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* Find a slot for this query in the gb object */
+   qlen = resultLen + sizeof(SVGA3dQueryState);
+   sq->offset = allocate_query(svga, sq->svga_type, qlen);
+   if (sq->offset == -1)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
+            sq->svga_type, sq->id, sq->offset);
+
+   /**
+    * Send SVGA3D commands to define the query
+    */
+   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   }
+   if (ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   }
+   assert(ret == PIPE_OK);
+
+   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   }
+   assert(ret == PIPE_OK);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+
+   /* Deallocate the memory slot allocated for this query */
+   deallocate_query(svga, sq);
+
+   return ret;
+}
+
+
+/**
+ * Rebind queryies to the context.
+ */
+static void
+rebind_vgpu10_query(struct svga_context *svga)
+{
+   if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      svga->swc->query_bind(svga->swc, svga->gb_query,
+                            SVGA_QUERY_FLAG_REF);
+   }
+
+   svga->rebind.flags.query = FALSE;
+}
+
+
+static enum pipe_error
+begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+   int status = 0;
+
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   /* Initialize the query state to NEW */
+   status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
+   if (status)
+      return PIPE_ERROR;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   /* Send the BeginQuery command to the device */
+   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   }
+
+   /* Finish fence is copied here from get_query_result_vgpu10. This helps
+    * with cases where svga_begin_query might be called again before
+    * svga_get_query_result, such as GL_TIME_ELAPSED.
+    */
+   if (!sq->fence) {
+      svga_context_flush(svga, &sq->fence);
+   }
+   sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
+                        boolean wait, void *result, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   SVGA3dQueryState queryState;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+
+   if (queryState == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+   }
+
+   assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          queryState == SVGA3D_QUERYSTATE_FAILED);
+
+   return TRUE;
+}
 
 static struct pipe_query *
 svga_create_query(struct pipe_context *pipe,
                   unsigned query_type,
                   unsigned index)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(query_type < SVGA_QUERY_MAX);
 
    sq = CALLOC_STRUCT(svga_query);
    if (!sq)
-      goto no_sq;
+      goto fail;
+
+   /* Allocate an integer ID for the query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
+
+   SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
+            query_type, sq, sq->id);
 
    switch (query_type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+      if (svga_have_vgpu10(svga)) {
+         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
 
-      sq->hwbuf = svga_winsys_buffer_create(svga, 1,
-                                            SVGA_BUFFER_USAGE_PINNED,
-                                            sizeof *sq->queryResult);
-      if (!sq->hwbuf) {
-         debug_printf("svga: failed to alloc query object!\n");
-         goto no_hwbuf;
-      }
+         /**
+          * In OpenGL, occlusion counter query can be used in conditional
+          * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
+          * be used for predication. Hence, we need to create an occlusion
+          * predicate query along with the occlusion counter query. So when
+          * the occlusion counter query is used for predication, the associated
+          * query of occlusion predicate type will be used
+          * in the SetPredication command.
+          */
+         sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
 
-      sq->queryResult = (SVGA3dQueryResult *)
-         sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
-      if (!sq->queryResult) {
-         debug_printf("svga: failed to map query object!\n");
-         goto no_query_result;
+      } else {
+         define_query_vgpu9(svga, sq);
       }
-
-      sq->queryResult->totalSize = sizeof *sq->queryResult;
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-
-      /* We request the buffer to be pinned and assume it is always mapped.
-       * The reason is that we don't want to wait for fences when checking the
-       * query status.
-       */
-      sws->buffer_unmap(sws, sq->hwbuf);
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+      define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXStreamOutStatisticsQueryResult));
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXTimestampQueryResult));
       break;
    case SVGA_QUERY_DRAW_CALLS:
    case SVGA_QUERY_FALLBACKS:
@@ -129,28 +732,50 @@ svga_create_query(struct pipe_context *pipe,
 
    return &sq->base;
 
-no_query_result:
-   sws->buffer_destroy(sws, sq->hwbuf);
-no_hwbuf:
+fail:
    FREE(sq);
-no_sq:
    return NULL;
 }
 
-
 static void
 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   if (q == NULL) {
+      return destroy_gb_query_obj(svga);
+   }
+
+   sq = svga_query(q);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      sws->buffer_destroy(sws, sq->hwbuf);
-      sq->hwbuf = NULL;
+      if (svga_have_vgpu10(svga)) {
+         /* make sure to also destroy any associated predicate query */
+         if (sq->predicate)
+            svga_destroy_query(pipe, sq->predicate);
+         destroy_query_vgpu10(svga, sq);
+      } else {
+         sws->buffer_destroy(sws, sq->hwbuf);
+      }
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
       sws->fence_reference(sws, &sq->fence, NULL);
       break;
    case SVGA_QUERY_DRAW_CALLS:
@@ -162,6 +787,9 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
       assert(!"svga: unexpected query type in svga_destroy_query()");
    }
 
+   /* Free the query id */
+   util_bitmask_clear(svga->query_id_bm, sq->id);
+
    FREE(sq);
 }
 
@@ -169,13 +797,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 static boolean
 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
 
    /* Need to flush out buffered drawing commands so that they don't
     * get counted in the query results.
@@ -184,31 +814,33 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(!svga->sq);
-      if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
-         /* The application doesn't care for the pending query result.
-          * We cannot let go of the existing buffer and just get a new one
-          * because its storage may be reused for other purposes and clobbered
-          * by the host when it determines the query result.  So the only
-          * option here is to wait for the existing query's result -- not a
-          * big deal, given that no sane application would do this.
-          */
-         uint64_t result;
-         svga_get_query_result(pipe, q, TRUE, (void*)&result);
-         assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
-      }
-
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-      sws->fence_reference(sws, &sq->fence, NULL);
-
-      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = begin_query_vgpu10(svga, sq);
+         /* also need to start the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = begin_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = begin_query_vgpu9(svga, sq);
       }
-
-      svga->sq = sq;
+      assert(ret == PIPE_OK);
+      (void) ret;
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
       break;
    case SVGA_QUERY_DRAW_CALLS:
       sq->begin_count = svga->num_draw_calls;
@@ -222,6 +854,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
    default:
       assert(!"unexpected query type in svga_begin_query()");
    }
+
+   svga->sq[sq->type] = sq;
+
    return true;
 }
 
@@ -229,35 +864,57 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 static void
 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
+
+   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+      svga_begin_query(pipe, q);
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(svga->sq[sq->type] == sq);
+
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(svga->sq == sq);
-
-      /* Set to PENDING before sending EndQuery. */
-      sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
-
-      ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = end_query_vgpu10(svga, sq);
+         /* also need to end the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = end_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = end_query_vgpu9(svga, sq);
       }
-
+      assert(ret == PIPE_OK);
+      (void) ret;
       /* TODO: Delay flushing. We don't really need to flush here, just ensure
        * that there is one flush before svga_get_query_result attempts to get
        * the result.
        */
       svga_context_flush(svga, NULL);
-
-      svga->sq = NULL;
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
       break;
    case SVGA_QUERY_DRAW_CALLS:
       sq->end_count = svga->num_draw_calls;
@@ -271,6 +928,7 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
    default:
       assert(!"unexpected query type in svga_end_query()");
    }
+   svga->sq[sq->type] = NULL;
 }
 
 
@@ -280,49 +938,75 @@ svga_get_query_result(struct pipe_context *pipe,
                       boolean wait,
                       union pipe_query_result *vresult)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen( pipe->screen );
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
-   SVGA3dQueryState state;
-   uint64_t *result = (uint64_t *) vresult;
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
+   uint64_t *result = (uint64_t *)vresult;
+   boolean ret = TRUE;
+
+   assert(sq);
 
-   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
+            __FUNCTION__, sq, sq->id, wait);
 
    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      /* The query status won't be updated by the host unless
-       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
-       * a synchronous wait on the host.
-       */
-      if (!sq->fence) {
-         enum pipe_error ret;
-
-         ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         if (ret != PIPE_OK) {
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-            assert(ret == PIPE_OK);
-         }
-
-         svga_context_flush(svga, &sq->fence);
-
-         assert(sq->fence);
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         *result = (uint64_t)occResult.samplesRendered;
+      } else {
+         ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
       }
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+      SVGADXOcclusionPredicateQueryResult occResult;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&occResult, sizeof(occResult));
+      vresult->b = occResult.anySamplesRendered != 0;
+      break;
+   }
+   case PIPE_QUERY_SO_STATISTICS: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+      struct pipe_query_data_so_statistics *pResult =
+         (struct pipe_query_data_so_statistics *)vresult;
 
-      state = sq->queryResult->state;
-      if (state == SVGA3D_QUERYSTATE_PENDING) {
-         if (!wait)
-            return FALSE;
-         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
-         state = sq->queryResult->state;
-      }
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      pResult->num_primitives_written = sResult.numPrimitivesWritten;
+      pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_TIMESTAMP: {
+      SVGADXTimestampQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      *result = (uint64_t)sResult.timestamp;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_GENERATED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
 
-      assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
-             state == SVGA3D_QUERYSTATE_FAILED);
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_EMITTED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
 
-      *result = (uint64_t) sq->queryResult->result32;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesWritten;
       break;
+   }
    case SVGA_QUERY_DRAW_CALLS:
       /* fall-through */
    case SVGA_QUERY_FALLBACKS:
@@ -335,9 +1019,73 @@ svga_get_query_result(struct pipe_context *pipe,
       assert(!"unexpected query type in svga_get_query_result");
    }
 
-   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
 
-   return TRUE;
+   return ret;
+}
+
+static void
+svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
+                      boolean condition, uint mode)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq = svga_query(q);
+   SVGA3dQueryId queryId;
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   assert(svga_have_vgpu10(svga));
+   if (sq == NULL) {
+      queryId = SVGA3D_INVALID_ID;
+   }
+   else {
+      assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
+             sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
+
+      if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
+         assert(sq->predicate);
+         /**
+          * For conditional rendering, make sure to use the associated
+          * predicate query.
+          */
+         sq = svga_query(sq->predicate);
+      }
+      queryId = sq->id;
+
+      if ((mode == PIPE_RENDER_COND_WAIT ||
+           mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
+         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      }
+   }
+
+   ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                      (uint32) condition);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                         (uint32) condition);
+   }
+}
+
+
+/*
+ * This function is a workaround because we lack the ability to query
+ * renderer's time synchornously.
+ */
+static uint64_t
+svga_get_timestamp(struct pipe_context *pipe)
+{
+   struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
+   union pipe_query_result result;
+
+   svga_begin_query(pipe, q);
+   svga_end_query(pipe,q);
+   svga_get_query_result(pipe, q, TRUE, &result);
+   svga_destroy_query(pipe, q);
+
+   return result.u64;
 }
 
 
@@ -349,4 +1097,6 @@ svga_init_query_functions(struct svga_context *svga)
    svga->pipe.begin_query = svga_begin_query;
    svga->pipe.end_query = svga_end_query;
    svga->pipe.get_query_result = svga_get_query_result;
+   svga->pipe.render_condition = svga_render_condition;
+   svga->pipe.get_timestamp = svga_get_timestamp;
 }
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 356898a86e7..a7aadac0111 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -23,16 +23,18 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
 #include "draw/draw_context.h"
+#include "util/u_bitmask.h"
 #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_hw_reg.h"
 #include "svga_screen.h"
 
-#include "svga_hw_reg.h"
 
 /* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
  */
@@ -61,6 +63,96 @@ static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
 }
 
 
+static unsigned
+translate_fill_mode(unsigned fill)
+{
+   switch (fill) {
+   case PIPE_POLYGON_MODE_POINT:
+      return SVGA3D_FILLMODE_POINT;
+   case PIPE_POLYGON_MODE_LINE:
+      return SVGA3D_FILLMODE_LINE;
+   case PIPE_POLYGON_MODE_FILL:
+      return SVGA3D_FILLMODE_FILL;
+   default:
+      assert(!"Bad fill mode");
+      return SVGA3D_FILLMODE_FILL;
+   }
+}
+
+
+static unsigned
+translate_cull_mode(unsigned cull)
+{
+   switch (cull) {
+   case PIPE_FACE_NONE:
+      return SVGA3D_CULL_NONE;
+   case PIPE_FACE_FRONT:
+      return SVGA3D_CULL_FRONT;
+   case PIPE_FACE_BACK:
+      return SVGA3D_CULL_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:
+      /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */
+      return SVGA3D_CULL_NONE;
+   default:
+      assert(!"Bad cull mode");
+      return SVGA3D_CULL_NONE;
+   }
+}
+
+
+static void
+define_rasterizer_object(struct svga_context *svga,
+                         struct svga_rasterizer_state *rast)
+{
+   unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
+   unsigned cull_mode = translate_cull_mode(rast->templ.cull_face);
+   int depth_bias = rast->templ.offset_units;
+   float slope_scaled_depth_bias =  rast->templ.offset_scale;
+   float depth_bias_clamp = 0.0; /* XXX fix me */
+   unsigned try;
+   const float line_width = rast->templ.line_width > 0.0f ?
+      rast->templ.line_width : 1.0f;
+   const uint8 line_factor = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_factor : 0;
+   const uint16 line_pattern = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_pattern : 0;
+
+   rast->id = util_bitmask_add(svga->rast_object_id_bm);
+
+   if (rast->templ.fill_front != rast->templ.fill_back) {
+      /* The VGPU10 device can't handle different front/back fill modes.
+       * We'll handle that with a swtnl/draw fallback.  But we need to
+       * make sure we always fill triangles in that case.
+       */
+      fill_mode = SVGA3D_FILLMODE_FILL;
+   }
+
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             rast->id,
+                                             fill_mode,
+                                             cull_mode,
+                                             rast->templ.front_ccw,
+                                             depth_bias,
+                                             depth_bias_clamp,
+                                             slope_scaled_depth_bias,
+                                             rast->templ.depth_clip,
+                                             rast->templ.scissor,
+                                             rast->templ.multisample,
+                                             rast->templ.line_smooth,
+                                             line_width,
+                                             rast->templ.line_stipple_enable,
+                                             line_factor,
+                                             line_pattern,
+                                             !rast->templ.flatshade_first);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_rasterizer_state(struct pipe_context *pipe,
                              const struct pipe_rasterizer_state *templ)
@@ -92,17 +184,24 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
    rast->antialiasedlineenable = templ->line_smooth;
    rast->lastpixel = templ->line_last_pixel;
    rast->pointsprite = templ->sprite_coord_enable != 0x0;
-   rast->pointsize = templ->point_size;
-   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   if (templ->point_smooth) {
+      /* For smooth points we need to generate fragments for at least
+       * a 2x2 region.  Otherwise the quad we draw may be too small and
+       * we may generate no fragments at all.
+       */
+      rast->pointsize = MAX2(2.0f, templ->point_size);
+   }
+   else {
+      rast->pointsize = templ->point_size;
+   }
+
+   rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
 
    /* Use swtnl + decomposition implement these:
     */
-   if (templ->poly_stipple_enable) {
-      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
-      rast->need_pipeline_tris_str = "poly stipple";
-   }
 
-   if (screen->maxLineWidth > 1.0F) {
+   if (templ->line_width <= screen->maxLineWidth) {
       /* pass line width to device */
       rast->linewidth = MAX2(1.0F, templ->line_width);
    }
@@ -129,7 +228,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
       }
    } 
 
-   if (templ->point_smooth) {
+   if (!svga_have_vgpu10(svga) && templ->point_smooth) {
       rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
       rast->need_pipeline_points_str = "smooth points";
    }
@@ -231,13 +330,13 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
          rast->depthbias = templ->offset_units;
       }
 
-      rast->hw_unfilled = fill;
+      rast->hw_fillmode = fill;
    }
 
    if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
       /* Turn off stuff which will get done in the draw module:
        */
-      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
       rast->slopescaledepthbias = 0;
       rast->depthbias = 0;
    }
@@ -249,6 +348,10 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
       debug_printf(" tris: %s \n", rast->need_pipeline_tris_str);
    }
 
+   if (svga_have_vgpu10(svga)) {
+      define_rasterizer_object(svga, rast);
+   }
+
    return rast;
 }
 
@@ -258,18 +361,37 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
 
-
-   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
-                             state);
    svga->curr.rast = raster;
 
    svga->dirty |= SVGA_NEW_RAST;
+
+   if (raster && raster->templ.poly_stipple_enable) {
+      svga->dirty |= SVGA_NEW_STIPPLE;
+   }
 }
 
-static void svga_delete_rasterizer_state(struct pipe_context *pipe,
-                                         void *raster)
+static void
+svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
 {
-   FREE(raster);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster =
+      (struct svga_rasterizer_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      }
+
+      if (raster->id == svga->state.hw_draw.rasterizer_id)
+         svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->rast_object_id_bm, raster->id);
+   }
+
+   FREE(state);
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index effd490dd22..bb18f5a8247 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -23,17 +23,19 @@
  *
  **********************************************************/
 
-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
 #include "svga_resource_texture.h"
 
-#include "svga_debug.h"
 
 static inline unsigned
 translate_wrap_mode(unsigned wrap)
@@ -91,6 +93,126 @@ static inline unsigned translate_mip_filter( unsigned filter )
    }
 }
 
+
+static uint8
+translate_comparison_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:
+      return SVGA3D_COMPARISON_NEVER;
+   case PIPE_FUNC_LESS:
+      return SVGA3D_COMPARISON_LESS;
+   case PIPE_FUNC_EQUAL:
+      return SVGA3D_COMPARISON_EQUAL;
+   case PIPE_FUNC_LEQUAL:
+      return SVGA3D_COMPARISON_LESS_EQUAL;
+   case PIPE_FUNC_GREATER:
+      return SVGA3D_COMPARISON_GREATER;
+   case PIPE_FUNC_NOTEQUAL:
+      return SVGA3D_COMPARISON_NOT_EQUAL;
+   case PIPE_FUNC_GEQUAL:
+      return SVGA3D_COMPARISON_GREATER_EQUAL;
+   case PIPE_FUNC_ALWAYS:
+      return SVGA3D_COMPARISON_ALWAYS;
+   default:
+      assert(!"Invalid comparison function");
+      return SVGA3D_COMPARISON_ALWAYS;
+   }
+}
+
+
+/**
+ * Translate filtering state to vgpu10 format.
+ */
+static SVGA3dFilter
+translate_filter_mode(unsigned img_filter,
+                      unsigned min_filter,
+                      unsigned mag_filter,
+                      boolean anisotropic,
+                      boolean compare)
+{
+   SVGA3dFilter mode = 0;
+
+   if (img_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIP_LINEAR;
+   if (min_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIN_LINEAR;
+   if (mag_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MAG_LINEAR;
+   if (anisotropic)
+      mode |= SVGA3D_FILTER_ANISOTROPIC;
+   if (compare)
+      mode |= SVGA3D_FILTER_COMPARE;
+
+   return mode;
+}
+
+
+/**
+ * Define a vgpu10 sampler state.
+ */
+static void
+define_sampler_state_object(struct svga_context *svga,
+                            struct svga_sampler_state *ss,
+                            const struct pipe_sampler_state *ps)
+{
+   uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
+   boolean anisotropic;
+   uint8 compare_func;
+   SVGA3dFilter filter;
+   SVGA3dRGBAFloat bcolor;
+   unsigned try;
+   float min_lod, max_lod;
+
+   assert(svga_have_vgpu10(svga));
+
+   anisotropic = ss->aniso_level > 1.0f;
+
+   filter = translate_filter_mode(ps->min_mip_filter,
+                                  ps->min_img_filter,
+                                  ps->mag_img_filter,
+                                  anisotropic,
+                                  ss->compare_mode);
+
+   compare_func = translate_comparison_func(ss->compare_func);
+
+   COPY_4V(bcolor.value, ps->border_color.f);
+
+   ss->id = util_bitmask_add(svga->sampler_object_id_bm);
+
+   assert(ps->min_lod <= ps->max_lod);
+
+   if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* just use the base level image */
+      min_lod = max_lod = 0.0f;
+   }
+   else {
+      min_lod = ps->min_lod;
+      max_lod = ps->max_lod;
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineSamplerState(svga->swc,
+                                          ss->id,
+                                          filter,
+                                          ss->addressu,
+                                          ss->addressv,
+                                          ss->addressw,
+                                          ss->lod_bias, /* float */
+                                          max_aniso,
+                                          compare_func,
+                                          bcolor,
+                                          min_lod,       /* float */
+                                          max_lod);      /* float */
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_sampler_state(struct pipe_context *pipe,
                           const struct pipe_sampler_state *sampler)
@@ -141,6 +263,10 @@ svga_create_sampler_state(struct pipe_context *pipe,
       }
    }
 
+   if (svga_have_vgpu10(svga)) {
+      define_sampler_state_object(svga, cso, sampler);
+   }
+
    SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
             cso->min_lod, cso->view_min_lod, cso->view_max_lod,
             cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
@@ -161,19 +287,19 @@ svga_bind_sampler_states(struct pipe_context *pipe,
    assert(shader < PIPE_SHADER_TYPES);
    assert(start + num <= PIPE_MAX_SAMPLERS);
 
-   /* we only support fragment shader samplers at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
       return;
 
    for (i = 0; i < num; i++)
-      svga->curr.sampler[start + i] = samplers[i];
+      svga->curr.sampler[shader][start + i] = samplers[i];
 
    /* find highest non-null sampler[] entry */
    {
-      unsigned j = MAX2(svga->curr.num_samplers, start + num);
-      while (j > 0 && svga->curr.sampler[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
+      while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
          j--;
-      svga->curr.num_samplers = j;
+      svga->curr.num_samplers[shader] = j;
    }
 
    svga->dirty |= SVGA_NEW_SAMPLER;
@@ -183,6 +309,22 @@ svga_bind_sampler_states(struct pipe_context *pipe,
 static void svga_delete_sampler_state(struct pipe_context *pipe,
                                       void *sampler)
 {
+   struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
+   struct svga_context *svga = svga_context(pipe);
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      }
+      util_bitmask_clear(svga->sampler_object_id_bm, ss->id);
+   }
+
    FREE(sampler);
 }
 
@@ -192,17 +334,21 @@ svga_create_sampler_view(struct pipe_context *pipe,
                          struct pipe_resource *texture,
                          const struct pipe_sampler_view *templ)
 {
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_resource_reference(&view->texture, texture);
-      view->context = pipe;
+   struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);
+
+   if (!sv) {
+      return NULL;
    }
 
-   return view;
+   sv->base = *templ;
+   sv->base.reference.count = 1;
+   sv->base.texture = NULL;
+   pipe_resource_reference(&sv->base.texture, texture);
+
+   sv->base.context = pipe;
+   sv->id = SVGA3D_INVALID_ID;
+
+   return &sv->base;
 }
 
 
@@ -210,8 +356,37 @@ static void
 svga_sampler_view_destroy(struct pipe_context *pipe,
                           struct pipe_sampler_view *view)
 {
-   pipe_resource_reference(&view->texture, NULL);
-   FREE(view);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
+
+   if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
+      if (view->context != pipe) {
+         /* The SVGA3D device will generate an error (and on Linux, cause
+          * us to abort) if we try to destroy a shader resource view from
+          * a context other than the one it was created with.  Skip the
+          * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
+          * view for now.  This should only sometimes happen when a shared
+          * texture is deleted.
+          */
+         _debug_printf("context mismatch in %s\n", __func__);
+      }
+      else {
+         enum pipe_error ret;
+
+         svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
+
+         ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         }
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+      }
+   }
+
+   pipe_resource_reference(&sv->base.texture, NULL);
+
+   FREE(sv);
 }
 
 static void
@@ -227,20 +402,20 @@ svga_set_sampler_views(struct pipe_context *pipe,
    uint i;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(svga->curr.sampler_views));
+   assert(start + num <= Elements(svga->curr.sampler_views[shader]));
 
-   /* we only support fragment shader sampler views at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
       return;
 
    for (i = 0; i < num; i++) {
-      if (svga->curr.sampler_views[start + i] != views[i]) {
+      if (svga->curr.sampler_views[shader][start + i] != views[i]) {
          /* Note: we're using pipe_sampler_view_release() here to work around
           * a possible crash when the old view belongs to another context that
           * was already destroyed.
           */
-         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]);
-         pipe_sampler_view_reference(&svga->curr.sampler_views[start + i],
+         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
+         pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
                                      views[i]);
       }
 
@@ -256,10 +431,10 @@ svga_set_sampler_views(struct pipe_context *pipe,
 
    /* find highest non-null sampler_views[] entry */
    {
-      unsigned j = MAX2(svga->curr.num_sampler_views, start + num);
-      while (j > 0 && svga->curr.sampler_views[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
+      while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
          j--;
-      svga->curr.num_sampler_views = j;
+      svga->curr.num_sampler_views[shader] = j;
    }
 
    svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c
index 05672a95165..1da63204428 100644
--- a/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -38,7 +38,7 @@ struct svga_stream_output_target {
 };
 
 /** cast wrapper */
-static INLINE struct svga_stream_output_target *
+static inline struct svga_stream_output_target *
 svga_stream_output_target(struct pipe_stream_output_target *s)
 {
    return (struct svga_stream_output_target *)s;
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index faf77f3ab63..e0932a9dbc1 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -23,17 +23,21 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_format.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_transfer.h"
 #include "tgsi/tgsi_parse.h"
 
-#include "svga_screen.h"
-#include "svga_resource_buffer.h"
 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_screen.h"
 
 
 static void svga_set_vertex_buffers(struct pipe_context *pipe,
@@ -55,25 +59,33 @@ static void svga_set_index_buffer(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
 
-   if (ib) {
-      pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer);
-      memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib));
-   }
-   else {
-      pipe_resource_reference(&svga->curr.ib.buffer, NULL);
-      memset(&svga->curr.ib, 0, sizeof(svga->curr.ib));
-   }
+   util_set_index_buffer(&svga->curr.ib, ib);
+}
 
-   /* TODO make this more like a state */
+
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return TRUE;
+   default:
+      return FALSE;
+   }
 }
 
 
 /**
- * Given a gallium vertex element format, return the corresponding SVGA3D
- * format.  Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ * Given a gallium vertex element format, return the corresponding
+ * SVGA3dDeclType.
  */
 static SVGA3dDeclType
-translate_vertex_format(enum pipe_format format)
+translate_vertex_format_to_decltype(enum pipe_format format)
 {
    switch (format) {
    case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
@@ -94,10 +106,10 @@ translate_vertex_format(enum pipe_format format)
    case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
    case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
 
-   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
+   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */
    case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
 
-   /* See attrib_needs_w_to_1() below */
+   /* See attrib_needs_w_to_1() above */
    case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
    case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
    case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
@@ -111,38 +123,121 @@ translate_vertex_format(enum pipe_format format)
 }
 
 
-/**
- * Does the given vertex attrib format need range adjustment in the VS?
- * Range adjustment scales and biases values from [0,1] to [-1,1].
- * This lets us avoid the swtnl path.
- */
-static boolean
-attrib_needs_range_adjustment(enum pipe_format format)
+static void
+define_input_element_object(struct svga_context *svga,
+                            struct svga_velems_state *velems)
 {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(velems->count <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const struct pipe_vertex_element *elem = velems->velem + i;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(elem->src_format,
+                                          &svga_format, &vf_flags);
+
+      velems->decl_type[i] =
+         translate_vertex_format_to_decltype(elem->src_format);
+      elements[i].inputSlot = elem->vertex_buffer_index;
+      elements[i].alignedByteOffset = elem->src_offset;
+      elements[i].format = svga_format;
+
+      if (elem->instance_divisor) {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA;
+         elements[i].instanceDataStepRate = elem->instance_divisor;
+      }
+      else {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+         elements[i].instanceDataStepRate = 0;
+      }
+      elements[i].inputRegister = i;
+
+      if (elements[i].format == SVGA3D_FORMAT_INVALID) {
+         velems->need_swvfetch = TRUE;
+      }
+
+      if (util_format_is_pure_integer(elem->src_format)) {
+         velems->attrib_is_pure_int |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
+
+      if (vf_flags & VF_U_TO_F_CAST) {
+         velems->adjust_attrib_utof |= (1 << i);
+      }
+      else if (vf_flags & VF_I_TO_F_CAST) {
+         velems->adjust_attrib_itof |= (1 << i);
+      }
+
+      if (vf_flags & VF_BGRA) {
+         velems->attrib_is_bgra |= (1 << i);
+      }
+
+      if (vf_flags & VF_PUINT_TO_SNORM) {
+         velems->attrib_puint_to_snorm |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_USCALED) {
+         velems->attrib_puint_to_uscaled |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_SSCALED) {
+         velems->attrib_puint_to_sscaled |= (1 << i);
+      }
+   }
+
+   velems->id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                           velems->id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                              velems->id, elements);
+      assert(ret == PIPE_OK);
    }
 }
 
 
 /**
- * Does the given vertex attrib format need to have the W component set
- * to one in the VS?
+ * Translate the vertex element types to SVGA3dDeclType and check
+ * for VS-based vertex attribute adjustments.
  */
-static boolean
-attrib_needs_w_to_1(enum pipe_format format)
+static void
+translate_vertex_decls(struct svga_context *svga,
+                       struct svga_velems_state *velems)
 {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-   case PIPE_FORMAT_R8G8B8_UNORM:
-   case PIPE_FORMAT_R16G16B16_SNORM:
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   unsigned i;
+
+   assert(!svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const enum pipe_format f = velems->velem[i].src_format;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags);
+
+      velems->decl_type[i] = translate_vertex_format_to_decltype(f);
+      if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
+         /* Unsupported format - use software fetch */
+         velems->need_swvfetch = TRUE;
+      }
+
+      /* Check for VS-based adjustments */
+      if (attrib_needs_range_adjustment(f)) {
+         velems->adjust_attrib_range |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
    }
 }
 
@@ -152,53 +247,73 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
                                   unsigned count,
                                   const struct pipe_vertex_element *attribs)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_velems_state *velems;
+
    assert(count <= PIPE_MAX_ATTRIBS);
    velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
    if (velems) {
-      unsigned i;
-
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
 
       velems->need_swvfetch = FALSE;
       velems->adjust_attrib_range = 0x0;
+      velems->attrib_is_pure_int = 0x0;
       velems->adjust_attrib_w_1 = 0x0;
-
-      /* Translate Gallium vertex format to SVGA3dDeclType */
-      for (i = 0; i < count; i++) {
-         enum pipe_format f = attribs[i].src_format;
-         velems->decl_type[i] = translate_vertex_format(f);
-         if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
-            /* Unsupported format - use software fetch */
-            velems->need_swvfetch = TRUE;
-            break;
-         }
-
-         if (attrib_needs_range_adjustment(f)) {
-            velems->adjust_attrib_range |= (1 << i);
-         }
-         if (attrib_needs_w_to_1(f)) {
-            velems->adjust_attrib_w_1 |= (1 << i);
-         }
+      velems->adjust_attrib_itof = 0x0;
+      velems->adjust_attrib_utof = 0x0;
+      velems->attrib_is_bgra = 0x0;
+      velems->attrib_puint_to_snorm = 0x0;
+      velems->attrib_puint_to_uscaled = 0x0;
+      velems->attrib_puint_to_sscaled = 0x0;
+
+      if (svga_have_vgpu10(svga)) {
+         define_input_element_object(svga, velems);
+      }
+      else {
+         translate_vertex_decls(svga, velems);
       }
    }
    return velems;
 }
 
-static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
-                                            void *velems)
+
+static void
+svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
    struct svga_context *svga = svga_context(pipe);
-   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
 
-   svga->curr.velems = svga_velems;
+   svga->curr.velems = velems;
    svga->dirty |= SVGA_NEW_VELEMENT;
 }
 
-static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
-                                              void *velems)
+
+static void
+svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (velems->id == svga->state.hw_draw.layout_id)
+         svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->input_element_object_id_bm, velems->id);
+      velems->id = SVGA3D_INVALID_ID;
+   }
+
    FREE(velems);
 }
 
@@ -219,5 +334,3 @@ void svga_init_vertex_functions( struct svga_context *svga )
    svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
    svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
 }
-
-
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index c3ac663b4a2..630f4907895 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -32,11 +32,11 @@
 #include "tgsi/tgsi_text.h"
 
 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_shader.h"
+#include "svga_streamout.h"
 
 
 /**
@@ -100,6 +100,7 @@ svga_create_vs_state(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+
    if (!vs)
       return NULL;
 
@@ -123,10 +124,12 @@ svga_create_vs_state(struct pipe_context *pipe,
 
    vs->base.id = svga->debug.shader_id++;
 
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, vs->base.id,
-                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      vs->base.stream_output = svga_create_stream_output(svga, &vs->base,
+                                                         &templ->stream_output);
    }
 
    return vs;
@@ -139,6 +142,17 @@ svga_bind_vs_state(struct pipe_context *pipe, void *shader)
    struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
    struct svga_context *svga = svga_context(pipe);
 
+   if (vs == svga->curr.vs)
+      return;
+
+   /* If the currently bound vertex shader has a generated geometry shader,
+    * then unbind the geometry shader before binding a new vertex shader.
+    * We need to unbind the geometry shader here because there is no
+    * pipe_shader associated with the generated geometry shader.
+    */
+   if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL)
+      svga->pipe.bind_gs_state(&svga->pipe, NULL);
+
    svga->curr.vs = vs;
    svga->dirty |= SVGA_NEW_VS;
 }
@@ -154,20 +168,40 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
 
    svga_hwtnl_flush_retry(svga);
 
+   assert(vs->base.parent == NULL);
+
+   /* Check if there is a generated geometry shader to go with this
+    * vertex shader. If there is, then delete the geometry shader as well.
+    */
+   if (vs->gs != NULL) {
+      svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+   }
+
+   if (vs->base.stream_output != NULL)
+      svga_delete_stream_output(svga, vs->base.stream_output);
+
    draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
 
    for (variant = vs->base.variants; variant; variant = tmp) {
       tmp = variant->next;
 
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.vs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.vs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+            assert(ret == PIPE_OK);
+         }
          svga->state.hw_draw.vs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+         assert(ret == PIPE_OK);
+      }
    }
 
    FREE((void *)vs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c
index b295b44ea37..a910ae0ca72 100644
--- a/src/gallium/drivers/svga/svga_resource.c
+++ b/src/gallium/drivers/svga/svga_resource.c
@@ -69,18 +69,21 @@ svga_can_create_resource(struct pipe_screen *screen,
    struct svga_winsys_screen *sws = svgascreen->sws;
    SVGA3dSurfaceFormat format;
    SVGA3dSize base_level_size;
-   uint32 numFaces;
    uint32 numMipLevels;
+   uint32 arraySize;
 
    if (res->target == PIPE_BUFFER) {
       format = SVGA3D_BUFFER;
       base_level_size.width = res->width0;
       base_level_size.height = 1;
       base_level_size.depth = 1;
-      numFaces = 1;
       numMipLevels = 1;
+      arraySize = 1;
 
    } else {
+      if (res->target == PIPE_TEXTURE_CUBE)
+         assert(res->array_size == 6);
+
       format = svga_translate_format(svgascreen, res->format, res->bind);
       if (format == SVGA3D_FORMAT_INVALID)
          return FALSE;
@@ -88,12 +91,12 @@ svga_can_create_resource(struct pipe_screen *screen,
       base_level_size.width = res->width0;
       base_level_size.height = res->height0;
       base_level_size.depth = res->depth0;
-      numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
       numMipLevels = res->last_level + 1;
+      arraySize = res->array_size;
    }
 
    return sws->surface_can_create(sws, format, base_level_size, 
-                                  numFaces, numMipLevels);
+                                  arraySize, numMipLevels);
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 13f85cddbd5..7ef36b367d3 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -48,7 +48,8 @@
 static inline boolean
 svga_buffer_needs_hw_storage(unsigned usage)
 {
-   return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
+   return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                    PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0;
 }
 
 
@@ -87,6 +88,26 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    transfer->usage = usage;
    transfer->box = *box;
 
+   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+      /* Only need to test for vgpu10 since only vgpu10 features (streamout,
+       * buffer copy) can modify buffers on the device.
+       */
+      if (svga_have_vgpu10(svga)) {
+         enum pipe_error ret;
+         assert(sbuf->handle);
+         ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+            assert(ret == PIPE_OK);
+         }
+
+         svga_context_finish(svga);
+
+         sbuf->dirty = FALSE;
+      }
+   }
+
    if (usage & PIPE_TRANSFER_WRITE) {
       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
          /*
@@ -343,13 +364,43 @@ svga_buffer_create(struct pipe_screen *screen,
    sbuf->b.vtbl = &svga_buffer_vtbl;
    pipe_reference_init(&sbuf->b.b.reference, 1);
    sbuf->b.b.screen = screen;
+   sbuf->bind_flags = template->bind;
+
+   if (template->bind & PIPE_BIND_CONSTANT_BUFFER) {
+      /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER
+       * flag set.
+       */
+      if (ss->sws->have_vgpu10) {
+         sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER;
+
+         /* Constant buffer size needs to be in multiples of 16. */
+         sbuf->b.b.width0 = align(sbuf->b.b.width0, 16);
+      }
+   }
 
    if(svga_buffer_needs_hw_storage(template->bind)) {
+
+      /* If the buffer will be used for vertex/index/stream data, set all
+       * the flags so that the buffer will be accepted for all those uses.
+       * Note that the PIPE_BIND_ flags we get from the state tracker are
+       * just a hint about how the buffer may be used.  And OpenGL buffer
+       * object may be used for many different things.
+       */
+      if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+         /* Not a constant buffer.  The buffer may be used for vertex data,
+          * indexes or stream-out.
+          */
+         sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
+                              PIPE_BIND_INDEX_BUFFER);
+         if (ss->sws->have_vgpu10)
+            sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
+      }
+
       if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
          goto error2;
    }
    else {
-      sbuf->swbuf = align_malloc(template->width0, 64);
+      sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
       if(!sbuf->swbuf)
          goto error2;
    }
@@ -357,7 +408,7 @@ svga_buffer_create(struct pipe_screen *screen,
    debug_reference(&sbuf->b.b.reference,
                    (debug_reference_descriptor)debug_describe_resource, 0);
 
-   sbuf->size = util_resource_size(template);
+   sbuf->size = util_resource_size(&sbuf->b.b);
    ss->total_resource_bytes += sbuf->size;
 
    return &sbuf->b.b; 
@@ -391,6 +442,7 @@ svga_user_buffer_create(struct pipe_screen *screen,
    sbuf->b.b.depth0 = 1;
    sbuf->b.b.array_size = 1;
 
+   sbuf->bind_flags = bind;
    sbuf->swbuf = ptr;
    sbuf->user = TRUE;
 
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index e838beb6661..75e12c3220c 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -65,6 +65,9 @@ struct svga_buffer
 {
    struct u_resource b;
 
+   /** This is a superset of b.b.bind */
+   unsigned bind_flags;
+
    /**
     * Regular (non DMA'able) memory.
     * 
@@ -187,6 +190,8 @@ struct svga_buffer
    struct list_head head;
 
    unsigned size;  /**< Approximate size in bytes */
+
+   boolean dirty;  /**< Need to do a readback before mapping? */
 };
 
 
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 5686531f988..69e5f75e208 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -149,10 +149,22 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
       sbuf->key.flags = 0;
 
       sbuf->key.format = SVGA3D_BUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER)
+      if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) {
          sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) {
          sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER;
+
+      if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT;
+
+      if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
 
       sbuf->key.size.width = sbuf->b.b.width0;
       sbuf->key.size.height = 1;
@@ -161,10 +173,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
       sbuf->key.numFaces = 1;
       sbuf->key.numMipLevels = 1;
       sbuf->key.cachable = 1;
+      sbuf->key.arraySize = 1;
 
       SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0);
 
-      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind,
+                                                sbuf->b.b.usage, &sbuf->key);
       if (!sbuf->handle)
          return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -203,8 +217,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
 			      struct svga_buffer *sbuf)
 {
    struct svga_winsys_context *swc = svga->swc;
-   SVGA3dCmdUpdateGBImage *cmd;
-   struct svga_3d_update_gb_image *ccmd = NULL;
+   SVGA3dCmdUpdateGBImage *update_cmd;
+   struct svga_3d_update_gb_image *whole_update_cmd = NULL;
    uint32 numBoxes = sbuf->map.num_ranges;
    struct pipe_resource *dummy;
    unsigned int i;
@@ -214,68 +228,78 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
 
    if (sbuf->dma.flags.discard) {
       struct svga_3d_invalidate_gb_image *cicmd = NULL;
-      SVGA3dCmdInvalidateGBImage *icmd;
+      SVGA3dCmdInvalidateGBImage *invalidate_cmd;
+      const unsigned total_commands_size =
+         sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);
 
       /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
        * 'numBoxes' UPDATE_GB_IMAGE commands.  Allocate all at once rather
        * than with separate commands because we need to properly deal with
        * filling the command buffer.
        */
-      icmd = SVGA3D_FIFOReserve(swc,
-				SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
-				sizeof *icmd + numBoxes * sizeof *ccmd,
-				2);
-      if (!icmd)
+      invalidate_cmd = SVGA3D_FIFOReserve(swc,
+                                          SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+                                          total_commands_size, 1 + numBoxes);
+      if (!invalidate_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;
 
-      cicmd = container_of(icmd, cicmd, body);
-      cicmd->header.size = sizeof *icmd;
-      swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+      cicmd = container_of(invalidate_cmd, cicmd, body);
+      cicmd->header.size = sizeof(*invalidate_cmd);
+      swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle,
                               (SVGA_RELOC_WRITE |
                                SVGA_RELOC_INTERNAL |
                                SVGA_RELOC_DMA));
-      icmd->image.face = 0;
-      icmd->image.mipmap = 0;
+      invalidate_cmd->image.face = 0;
+      invalidate_cmd->image.mipmap = 0;
 
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
       /* initialize the first UPDATE_GB_IMAGE command */
-      ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
-      ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
-      cmd = &ccmd->body;
+      whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+      update_cmd = &whole_update_cmd->body;
 
    } else {
       /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
-      cmd = SVGA3D_FIFOReserve(swc,
-			       SVGA_3D_CMD_UPDATE_GB_IMAGE,
-			       sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
-			       1);
-      if (!cmd)
+      const unsigned total_commands_size =
+         sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
+
+      update_cmd = SVGA3D_FIFOReserve(swc,
+                                      SVGA_3D_CMD_UPDATE_GB_IMAGE,
+                                      total_commands_size, numBoxes);
+      if (!update_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;
 
-      ccmd = container_of(cmd, ccmd, body);
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = container_of(update_cmd, whole_update_cmd, body);
    }
 
    /* Init the first UPDATE_GB_IMAGE command */
-   ccmd->header.size = sizeof *cmd;
-   swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+   whole_update_cmd->header.size = sizeof(*update_cmd);
+   swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle,
 			   SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
-   cmd->image.face = 0;
-   cmd->image.mipmap = 0;
+   update_cmd->image.face = 0;
+   update_cmd->image.mipmap = 0;
 
    /* Save pointer to the first UPDATE_GB_IMAGE command so that we can
     * fill in the box info below.
     */
-   sbuf->dma.updates = ccmd;
+   sbuf->dma.updates = whole_update_cmd;
 
    /*
-    * Copy the relocation info, face and mipmap to all
-    * subsequent commands. NOTE: For winsyses that actually
-    * patch the image.sid member at flush time, this will fail
-    * miserably. For those we need to add as many relocations
-    * as there are copy boxes.
+    * Copy the face, mipmap, etc. info to all subsequent commands.
+    * Also do the surface relocation for each subsequent command.
     */
-
    for (i = 1; i < numBoxes; ++i) {
-      memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+      whole_update_cmd++;
+      memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd));
+
+      swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL,
+                              sbuf->handle,
+                              SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
    }
 
    /* Increment reference count */
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 64fd245c0e1..90787be8073 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -46,12 +46,6 @@
 #include "svga_debug.h"
 
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
 static void
 svga_transfer_dma_band(struct svga_context *svga,
                        struct svga_transfer *st,
@@ -59,10 +53,10 @@ svga_transfer_dma_band(struct svga_context *svga,
                        unsigned y, unsigned h, unsigned srcy,
                        SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
    SVGA3dCopyBox box;
    enum pipe_error ret;
- 
+
    assert(!st->use_direct_map);
 
    box.x = st->base.box.x;
@@ -75,28 +69,23 @@ svga_transfer_dma_band(struct svga_context *svga,
    box.srcy = srcy;
    box.srcz = 0;
 
-   if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
-      st->face = st->base.box.z;
-      box.z = 0;
-   }
-   else
-      st->face = 0;
-
-   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
-                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
-                texture->handle,
-                st->face,
-                st->base.box.x,
-                y,
-                box.z,
-                st->base.box.x + st->base.box.width,
-                y + h,
-                box.z + 1,
-                util_format_get_blocksize(texture->b.b.format) * 8 /
-                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - "
+            "(%u, %u, %u), %ubpp\n",
+            transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+            texture->handle,
+            st->slice,
+            st->base.box.x,
+            y,
+            box.z,
+            st->base.box.x + st->base.box.width,
+            y + h,
+            box.z + 1,
+            util_format_get_blocksize(texture->b.b.format) * 8 /
+            (util_format_get_blockwidth(texture->b.b.format)
+             * util_format_get_blockheight(texture->b.b.format)));
 
    ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
-   if(ret != PIPE_OK) {
+   if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
       assert(ret == PIPE_OK);
@@ -110,7 +99,7 @@ svga_transfer_dma(struct svga_context *svga,
                   SVGA3dTransferType transfer,
                   SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
    struct svga_screen *screen = svga_screen(texture->b.b.screen);
    struct svga_winsys_screen *sws = screen->sws;
    struct pipe_fence_handle *fence = NULL;
@@ -126,14 +115,13 @@ svga_transfer_dma(struct svga_context *svga,
     */
    svga_surfaces_flush( svga );
 
-   if(!st->swbuf) {
+   if (!st->swbuf) {
       /* Do the DMA transfer in a single go */
-
       svga_transfer_dma_band(svga, st, transfer,
                              st->base.box.y, st->base.box.height, 0,
                              flags);
 
-      if(transfer == SVGA3D_READ_HOST_VRAM) {
+      if (transfer == SVGA3D_READ_HOST_VRAM) {
          svga_context_flush(svga, &fence);
          sws->fence_finish(sws, fence, 0);
          sws->fence_reference(sws, &fence, NULL);
@@ -141,10 +129,13 @@ svga_transfer_dma(struct svga_context *svga,
    }
    else {
       int y, h, srcy;
-      unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
+      unsigned blockheight =
+         util_format_get_blockheight(st->base.resource->format);
+
       h = st->hw_nblocksy * blockheight;
       srcy = 0;
-      for(y = 0; y < st->base.box.height; y += h) {
+
+      for (y = 0; y < st->base.box.height; y += h) {
          unsigned offset, length;
          void *hw, *sw;
 
@@ -158,7 +149,7 @@ svga_transfer_dma(struct svga_context *svga,
          offset = y * st->base.stride / blockheight;
          length = h * st->base.stride / blockheight;
 
-         sw = (uint8_t *)st->swbuf + offset;
+         sw = (uint8_t *) st->swbuf + offset;
 
          if (transfer == SVGA3D_WRITE_HOST_VRAM) {
             unsigned usage = PIPE_TRANSFER_WRITE;
@@ -184,16 +175,15 @@ svga_transfer_dma(struct svga_context *svga,
           * Prevent the texture contents to be discarded on the next band
           * upload.
           */
-
          flags.discard = FALSE;
 
-         if(transfer == SVGA3D_READ_HOST_VRAM) {
+         if (transfer == SVGA3D_READ_HOST_VRAM) {
             svga_context_flush(svga, &fence);
             sws->fence_finish(sws, fence, 0);
 
             hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
             assert(hw);
-            if(hw) {
+            if (hw) {
                memcpy(sw, hw, length);
                sws->buffer_unmap(sws, st->hwbuf);
             }
@@ -203,19 +193,22 @@ svga_transfer_dma(struct svga_context *svga,
 }
 
 
-static boolean 
+static boolean
 svga_texture_get_handle(struct pipe_screen *screen,
-                               struct pipe_resource *texture,
-                               struct winsys_handle *whandle)
+                        struct pipe_resource *texture,
+                        struct winsys_handle *whandle)
 {
    struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
    unsigned stride;
 
    assert(svga_texture(texture)->key.cachable == 0);
    svga_texture(texture)->key.cachable = 0;
+
    stride = util_format_get_nblocksx(texture->format, texture->width0) *
             util_format_get_blocksize(texture->format);
-   return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle);
+
+   return sws->surface_get_handle(sws, svga_texture(texture)->handle,
+                                  stride, whandle);
 }
 
 
@@ -238,6 +231,7 @@ svga_texture_destroy(struct pipe_screen *screen,
 
    ss->total_resource_bytes -= tex->size;
 
+   FREE(tex->defined);
    FREE(tex->rendered_to);
    FREE(tex);
 }
@@ -274,10 +268,43 @@ need_tex_readback(struct pipe_transfer *transfer)
 }
 
 
+static enum pipe_error
+readback_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+readback_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   }
+   return ret;
+}
+
 
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
 static void *
 svga_texture_transfer_map(struct pipe_context *pipe,
                           struct pipe_resource *texture,
@@ -289,6 +316,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(texture);
    struct svga_transfer *st;
    unsigned nblocksx, nblocksy;
    boolean use_direct_map = svga_have_gb_objects(svga) &&
@@ -326,25 +354,34 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    }
 
    pipe_resource_reference(&st->base.resource, texture);
+
    st->base.level = level;
    st->base.usage = usage;
    st->base.box = *box;
    st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
    st->base.layer_stride = st->base.stride * nblocksy;
 
+   switch (tex->b.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      st->slice = st->base.box.z;
+      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      break;
+   default:
+      st->slice = 0;
+      break;
+   }
+
    if (!use_direct_map) {
       /* Use a DMA buffer */
       st->hw_nblocksy = nblocksy;
 
-      st->hwbuf = svga_winsys_buffer_create(svga,
-                                            1, 
-                                            0,
-                                            st->hw_nblocksy * st->base.stride * d);
+      st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
       while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
-         st->hwbuf = svga_winsys_buffer_create(svga,
-                                               1, 
-                                               0,
-                                               st->hw_nblocksy * st->base.stride * d);
+         st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
       }
 
       if (!st->hwbuf) {
@@ -352,8 +389,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
          return NULL;
       }
 
-      if(st->hw_nblocksy < nblocksy) {
-         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+      if (st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer,
           * so allocate regular malloc memory instead */
          if (0) {
             debug_printf("%s: failed to allocate %u KB of DMA, "
@@ -379,45 +416,27 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       }
    } else {
       struct pipe_transfer *transfer = &st->base;
-      struct svga_texture *tex = svga_texture(transfer->resource);
       struct svga_winsys_surface *surf = tex->handle;
-      unsigned face;
-
-      assert(surf);
 
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
+      if (!surf) {
+         FREE(st);
+         return NULL;
       }
 
       if (need_tex_readback(transfer)) {
-	 SVGA3dBox box;
 	 enum pipe_error ret;
 
-	 box.x = transfer->box.x;
-	 box.y = transfer->box.y;
-	 box.w = transfer->box.width;
-	 box.h = transfer->box.height;
-	 box.d = transfer->box.depth;
-	 if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	    box.z = 0;
-	 }
-	 else {
-	    box.z = transfer->box.z;
-	 }
-
-         (void) box;  /* not used at this time */
-
          svga_surfaces_flush(svga);
 
-	 ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+         if (svga_have_vgpu10(svga)) {
+            ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level,
+                                        tex->b.b.last_level + 1);
+         } else {
+            ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
+         }
 
-	 if (ret != PIPE_OK) {
-	    svga_context_flush(svga, NULL);
-	    ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
-	    assert(ret == PIPE_OK);
-	 }
+         assert(ret == PIPE_OK);
+         (void) ret;
 
 	 svga_context_flush(svga, NULL);
 
@@ -425,7 +444,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
           * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
           * we could potentially clear the flag for all faces/layers/mips.
           */
-         svga_clear_texture_rendered_to(tex, face, transfer->level);
+         svga_clear_texture_rendered_to(tex, st->slice, transfer->level);
       }
       else {
 	 assert(transfer->usage & PIPE_TRANSFER_WRITE);
@@ -451,17 +470,15 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       return sws->buffer_map(sws, st->hwbuf, usage);
    }
    else {
-      struct svga_screen *screen = svga_screen(svga->pipe.screen);
-      SVGA3dSurfaceFormat format;
       SVGA3dSize baseLevelSize;
       struct svga_texture *tex = svga_texture(texture);
       struct svga_winsys_surface *surf = tex->handle;
       uint8_t *map;
       boolean retry;
-      unsigned face, offset, mip_width, mip_height;
-      unsigned xoffset = box->x;
-      unsigned yoffset = box->y;
-      unsigned zoffset = box->z;
+      unsigned offset, mip_width, mip_height;
+      unsigned xoffset = st->base.box.x;
+      unsigned yoffset = st->base.box.y;
+      unsigned zoffset = st->base.box.z;
 
       map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
       if (map == NULL && retry) {
@@ -484,21 +501,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       /**
        * Compute the offset to the specific texture slice in the buffer.
        */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-         face = zoffset;
-         zoffset = 0;
-      } else {
-         face = 0;
-      }
-
-      format = svga_translate_format(screen, tex->b.b.format, 0);
       baseLevelSize.width = tex->b.b.width0;
       baseLevelSize.height = tex->b.b.height0;
       baseLevelSize.depth = tex->b.b.depth0;
 
-      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+      offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
                                               tex->b.b.last_level + 1, /* numMips */
-                                              face, level);
+                                              st->slice, level);
       if (level > 0) {
          assert(offset > 0);
       }
@@ -506,7 +515,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       mip_width = u_minify(tex->b.b.width0, level);
       mip_height = u_minify(tex->b.b.height0, level);
 
-      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+      offset += svga3dsurface_get_pixel_offset(tex->key.format,
+                                               mip_width, mip_height,
                                                xoffset, yoffset, zoffset);
 
       return (void *) (map + offset);
@@ -541,9 +551,45 @@ svga_texture_surface_unmap(struct svga_context *svga,
 }
 
 
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
+static enum pipe_error
+update_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   const SVGA3dBox *box,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+update_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    const SVGA3dBox *box,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   }
+   return ret;
+}
+
+
 static void
 svga_texture_transfer_unmap(struct pipe_context *pipe,
 			    struct pipe_transfer *transfer)
@@ -579,26 +625,25 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
    } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
       struct svga_winsys_surface *surf =
 	 svga_texture(transfer->resource)->handle;
-      unsigned face;
       SVGA3dBox box;
       enum pipe_error ret;
 
       assert(svga_have_gb_objects(svga));
 
       /* update the effected region */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
-      }
-
       box.x = transfer->box.x;
       box.y = transfer->box.y;
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+      switch (tex->b.b.target) {
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_2D_ARRAY:
          box.z = 0;
-      }
-      else {
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         box.y = box.z = 0;
+         break;
+      default:
          box.z = transfer->box.z;
+         break;
       }
       box.w = transfer->box.width;
       box.h = transfer->box.height;
@@ -610,18 +655,21 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
                       box.x, box.y, box.z,
                       box.w, box.h, box.d);
 
-      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level,
+                                   tex->b.b.last_level + 1);
+      } else {
+         ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
       }
+
+      assert(ret == PIPE_OK);
+      (void) ret;
    }
 
    ss->texture_timestamp++;
    svga_age_texture_view(tex, transfer->level);
    if (transfer->resource->target == PIPE_TEXTURE_CUBE)
-      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+      svga_define_texture_level(tex, st->slice, transfer->level);
    else
       svga_define_texture_level(tex, 0, transfer->level);
 
@@ -635,7 +683,18 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
 }
 
 
-struct u_resource_vtbl svga_texture_vtbl = 
+/**
+ * Does format store depth values?
+ */
+static inline boolean
+format_has_depth(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return util_format_has_depth(desc);
+}
+
+
+struct u_resource_vtbl svga_texture_vtbl =
 {
    svga_texture_get_handle,	      /* get_handle */
    svga_texture_destroy,	      /* resource_destroy */
@@ -651,57 +710,119 @@ svga_texture_create(struct pipe_screen *screen,
                     const struct pipe_resource *template)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
-   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   struct svga_texture *tex;
+   unsigned bindings = template->bind;
 
-   if (!tex)
-      goto error1;
+   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) {
+      return NULL;
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex) {
+      return NULL;
+   }
+
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to) {
+      FREE(tex->defined);
+      FREE(tex);
+      return NULL;
+   }
 
    tex->b.b = *template;
    tex->b.vtbl = &svga_texture_vtbl;
    pipe_reference_init(&tex->b.b.reference, 1);
    tex->b.b.screen = screen;
 
-   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
-   if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS)
-      goto error2;
-   
    tex->key.flags = 0;
    tex->key.size.width = template->width0;
    tex->key.size.height = template->height0;
    tex->key.size.depth = template->depth0;
+   tex->key.arraySize = 1;
+   tex->key.numFaces = 1;
+   tex->key.sampleCount = template->nr_samples;
 
-   if(template->target == PIPE_TEXTURE_CUBE) {
-      tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
-      tex->key.numFaces = 6;
-   }
-   else {
-      tex->key.numFaces = 1;
+   if (template->nr_samples > 1) {
+      tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
    }
 
-   if (template->target == PIPE_TEXTURE_3D) {
-      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+   if (svgascreen->sws->have_vgpu10) {
+      switch (template->target) {
+      case PIPE_TEXTURE_1D:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         /* fall-through */
+      case PIPE_TEXTURE_2D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_ARRAY;
+         tex->key.arraySize = template->array_size;
+         break;
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      switch (template->target) {
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
    }
 
    tex->key.cachable = 1;
 
-   if (template->bind & PIPE_BIND_SAMPLER_VIEW)
+   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+      tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+
+      if (!(bindings & PIPE_BIND_RENDER_TARGET)) {
+         /* Also check if the format is renderable */
+         if (screen->is_format_supported(screen, template->format,
+                                         template->target,
+                                         template->nr_samples,
+                                         PIPE_BIND_RENDER_TARGET)) {
+            bindings |= PIPE_BIND_RENDER_TARGET;
+         }
+      }
+   }
 
-   if (template->bind & PIPE_BIND_DISPLAY_TARGET) {
+   if (bindings & PIPE_BIND_DISPLAY_TARGET) {
       tex->key.cachable = 0;
    }
 
-   if (template->bind & PIPE_BIND_SHARED) {
+   if (bindings & PIPE_BIND_SHARED) {
       tex->key.cachable = 0;
    }
 
-   if (template->bind & (PIPE_BIND_SCANOUT |
-                         PIPE_BIND_CURSOR)) {
-      tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) {
+      tex->key.scanout = 1;
       tex->key.cachable = 0;
    }
 
-   /* 
+   /*
     * Note: Previously we never passed the
     * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
     * know beforehand whether a texture will be used as a rendertarget or not
@@ -712,23 +833,55 @@ svga_texture_create(struct pipe_screen *screen,
     * (XA for example) uses it accurately and certain device versions
     * relies on it in certain situations to render correctly.
     */
-   if((template->bind & PIPE_BIND_RENDER_TARGET) &&
-      !util_format_is_s3tc(template->format))
+   if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+       !util_format_is_s3tc(template->format)) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-   
-   if(template->bind & PIPE_BIND_DEPTH_STENCIL)
+      tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+   }
+
+   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
-   
+      tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+   }
+
    tex->key.numMipLevels = template->last_level + 1;
-   
-   tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
-   if(tex->key.format == SVGA3D_FORMAT_INVALID)
-      goto error2;
+
+   tex->key.format = svga_translate_format(svgascreen, template->format,
+                                           bindings);
+   if (tex->key.format == SVGA3D_FORMAT_INVALID) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
+
+   /* Use typeless formats for sRGB and depth resources.  Typeless
+    * formats can be reinterpreted as other formats.  For example,
+    * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
+    * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
+    */
+   if (svgascreen->sws->have_vgpu10 &&
+       (util_format_is_srgb(template->format) ||
+        format_has_depth(template->format))) {
+      SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
+      if (0) {
+         debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
+                      svga_format_name(tex->key.format),
+                      svga_format_name(typeless),
+                      bindings);
+      }
+      tex->key.format = typeless;
+   }
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
-   tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
-   if (!tex->handle)
-       goto error2;
+   tex->handle = svga_screen_surface_create(svgascreen, bindings,
+                                            tex->b.b.usage, &tex->key);
+   if (!tex->handle) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
 
    SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
 
@@ -738,18 +891,7 @@ svga_texture_create(struct pipe_screen *screen,
    tex->size = util_resource_size(template);
    svgascreen->total_resource_bytes += tex->size;
 
-   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
-                             sizeof(tex->rendered_to[0]));
-   if (!tex->rendered_to)
-      goto error2;
-
    return &tex->b.b;
-
-error2:
-   FREE(tex->rendered_to);
-   FREE(tex);
-error1:
-   return NULL;
 }
 
 
@@ -777,16 +919,28 @@ svga_texture_from_handle(struct pipe_screen *screen,
    if (!srf)
       return NULL;
 
-   if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
-      unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
+   if (svga_translate_format(svga_screen(screen), template->format,
+                             template->bind) != format) {
+      unsigned f1 = svga_translate_format(svga_screen(screen),
+                                          template->format, template->bind);
       unsigned f2 = format;
 
-      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
-      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up.
+       */
+      if (f1 == SVGA3D_B8G8R8A8_UNORM)
+         f1 = SVGA3D_A8R8G8B8;
+      if (f1 == SVGA3D_B8G8R8X8_UNORM)
+         f1 = SVGA3D_X8R8G8B8;
+
+      if ( !( (f1 == f2) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) ||
               (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) ||
               (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
               (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
-         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         debug_printf("%s wrong format %s != %s\n", __FUNCTION__,
+                      svga_format_name(f1), svga_format_name(f2));
          return NULL;
       }
    }
@@ -795,6 +949,13 @@ svga_texture_from_handle(struct pipe_screen *screen,
    if (!tex)
       return NULL;
 
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
    tex->b.b = *template;
    tex->b.vtbl = &svga_texture_vtbl;
    pipe_reference_init(&tex->b.b.reference, 1);
@@ -803,9 +964,11 @@ svga_texture_from_handle(struct pipe_screen *screen,
    SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);
 
    tex->key.cachable = 0;
+   tex->key.format = format;
    tex->handle = srf;
 
    tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+   tex->imported = TRUE;
 
    return &tex->b.b;
 }
diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h
index 19dadfb8828..0326907240e 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -51,7 +51,7 @@ struct svga_texture
 {
    struct u_resource b;
 
-   boolean defined[6][SVGA_MAX_TEXTURE_LEVELS];
+   ushort *defined;
    
    struct svga_sampler_view *cached_view;
 
@@ -77,6 +77,12 @@ struct svga_texture
     */
    struct svga_winsys_surface *handle;
 
+   /**
+    * Whether the host side surface is imported and not created by this
+    * driver.
+    */
+   boolean imported;
+
    unsigned size;  /**< Approximate size in bytes */
 
    /** array indexed by cube face or 3D/array slice, one bit per mipmap level */
@@ -91,7 +97,7 @@ struct svga_transfer
 {
    struct pipe_transfer base;
 
-   unsigned face;
+   unsigned slice;  /**< array slice or cube face */
 
    struct svga_winsys_buffer *hwbuf;
 
@@ -135,29 +141,6 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
 }
 
 
-/**
- * Mark the given texture face/level as being defined.
- */
-static inline void
-svga_define_texture_level(struct svga_texture *tex,
-                          unsigned face,unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   tex->defined[face][level] = TRUE;
-}
-
-
-static inline bool
-svga_is_texture_level_defined(const struct svga_texture *tex,
-                              unsigned face, unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   return tex->defined[face][level];
-}
-
-
 /** For debugging, check that face and level are legal */
 static inline void
 check_face_level(const struct svga_texture *tex,
@@ -177,6 +160,27 @@ check_face_level(const struct svga_texture *tex,
 }
 
 
+/**
+ * Mark the given texture face/level as being defined.
+ */
+static inline void
+svga_define_texture_level(struct svga_texture *tex,
+                          unsigned face,unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->defined[face] |= 1 << level;
+}
+
+
+static inline bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+                              unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   return (tex->defined[face] & (1 << level)) != 0;
+}
+
+
 static inline void
 svga_set_texture_rendered_to(struct svga_texture *tex,
                              unsigned face, unsigned level)
diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c
index 55dc49f2d2c..ffa5bce80c0 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@@ -67,7 +67,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
    assert(pt);
    assert(min_lod <= max_lod);
    assert(max_lod <= pt->last_level);
-
+   assert(!svga_have_vgpu10(svga));
 
    /* Is a view needed */
    {
@@ -143,10 +143,12 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
             pt->last_level);
 
    sv->age = tex->age;
-   sv->handle = svga_texture_view_surface(svga, tex, flags, format,
+   sv->handle = svga_texture_view_surface(svga, tex,
+                                          PIPE_BIND_SAMPLER_VIEW,
+                                          flags, format,
                                           min_lod,
                                           max_lod - min_lod + 1,
-                                          -1, -1,
+                                          -1, 1, -1,
                                           &sv->key);
 
    if (!sv->handle) {
@@ -177,6 +179,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
    unsigned k;
 
    assert(svga);
+   assert(!svga_have_vgpu10(svga));
 
    if (v->handle == tex->handle)
       return;
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index 7f14323f84f..acd7ae0ca24 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -36,6 +36,7 @@ struct pipe_context;
 struct pipe_screen;
 struct svga_context;
 struct svga_winsys_surface;
+struct svga_surface;
 enum SVGA3dSurfaceFormat;
 
 
@@ -97,5 +98,8 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
    *ptr = v;
 }
 
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res);
 
 #endif
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 66c3deaa9e7..f2ae40b4fad 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -34,31 +34,37 @@
 #include "svga_context.h"
 #include "svga_format.h"
 #include "svga_screen.h"
+#include "svga_tgsi.h"
 #include "svga_resource_texture.h"
 #include "svga_resource.h"
 #include "svga_debug.h"
 
 #include "svga3d_shaderdefs.h"
+#include "VGPU10ShaderTokens.h"
 
+/* NOTE: this constant may get moved into a svga3d*.h header file */
+#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
 
 #ifdef DEBUG
 int SVGA_DEBUG = 0;
 
 static const struct debug_named_value svga_debug_flags[] = {
-   { "dma",      DEBUG_DMA, NULL },
-   { "tgsi",     DEBUG_TGSI, NULL },
-   { "pipe",     DEBUG_PIPE, NULL },
-   { "state",    DEBUG_STATE, NULL },
-   { "screen",   DEBUG_SCREEN, NULL },
-   { "tex",      DEBUG_TEX, NULL },
-   { "swtnl",    DEBUG_SWTNL, NULL },
-   { "const",    DEBUG_CONSTS, NULL },
-   { "viewport", DEBUG_VIEWPORT, NULL },
-   { "views",    DEBUG_VIEWS, NULL },
-   { "perf",     DEBUG_PERF, NULL },
-   { "flush",    DEBUG_FLUSH, NULL },
-   { "sync",     DEBUG_SYNC, NULL },
-   { "cache",    DEBUG_CACHE, NULL },
+   { "dma",         DEBUG_DMA, NULL },
+   { "tgsi",        DEBUG_TGSI, NULL },
+   { "pipe",        DEBUG_PIPE, NULL },
+   { "state",       DEBUG_STATE, NULL },
+   { "screen",      DEBUG_SCREEN, NULL },
+   { "tex",         DEBUG_TEX, NULL },
+   { "swtnl",       DEBUG_SWTNL, NULL },
+   { "const",       DEBUG_CONSTS, NULL },
+   { "viewport",    DEBUG_VIEWPORT, NULL },
+   { "views",       DEBUG_VIEWS, NULL },
+   { "perf",        DEBUG_PERF, NULL },
+   { "flush",       DEBUG_FLUSH, NULL },
+   { "sync",        DEBUG_SYNC, NULL },
+   { "cache",       DEBUG_CACHE, NULL },
+   { "streamout",   DEBUG_STREAMOUT, NULL },
+   { "query",       DEBUG_QUERY, NULL },
    DEBUG_NAMED_VALUE_END
 };
 #endif
@@ -80,18 +86,52 @@ svga_get_name( struct pipe_screen *pscreen )
     */
    build = "build: DEBUG;";
    mutex = "mutex: " PIPE_ATOMIC ";";
-#ifdef HAVE_LLVM
-   llvm = "LLVM;";
-#endif
 #else
    build = "build: RELEASE;";
 #endif
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
+#endif
 
    util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
    return name;
 }
 
 
+/** Helper for querying float-valued device cap */
+static float
+get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.f;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying uint-valued device cap */
+static unsigned
+get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.u;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying boolean-valued device cap */
+static boolean
+get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.b;
+   else
+      return defaultVal;
+}
 
 
 static float
@@ -99,7 +139,6 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
 
    switch (param) {
    case PIPE_CAPF_MAX_LINE_WIDTH:
@@ -113,12 +152,11 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
       return svgascreen->maxPointSize;
 
    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result))
-         return 4.0f;
-      return (float) result.u;
+      return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4);
 
    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
       return 15.0;
+
    case PIPE_CAPF_GUARD_BAND_LEFT:
    case PIPE_CAPF_GUARD_BAND_TOP:
    case PIPE_CAPF_GUARD_BAND_RIGHT:
@@ -145,7 +183,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TWO_SIDED_STENCIL:
       return 1;
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-      return 0;
+      /*
+       * "In virtually every OpenGL implementation and hardware,
+       * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1"
+       * http://www.opengl.org/wiki/Blending
+       */
+      return sws->have_vgpu10 ? 1 : 0;
    case PIPE_CAP_ANISOTROPIC_FILTER:
       return 1;
    case PIPE_CAP_POINT_SPRITE:
@@ -158,6 +201,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1;
    case PIPE_CAP_QUERY_TIME_ELAPSED:
       return 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return sws->have_vgpu10;
    case PIPE_CAP_TEXTURE_SHADOW_MAP:
       return 1;
    case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -170,7 +215,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_USER_CONSTANT_BUFFERS:
       return 1;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-      return 16;
+      return 256;
 
    case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
       {
@@ -199,17 +244,20 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
                   12 /* 2048x2048 */);
 
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+
    case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
       return 1;
 
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
       return 1;
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-      return 0;
+      return sws->have_vgpu10;
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
       return 0;
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-      return 1;
+      return !sws->have_vgpu10;
 
    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
       return 1; /* The color outputs of vertex shaders are not clamped */
@@ -222,7 +270,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1; /* expected for GL_ARB_framebuffer_object */
 
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 120;
+      return sws->have_vgpu10 ? 330 : 120;
 
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return 0;
@@ -230,49 +278,65 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_SM3:
       return 1;
 
-   /* Unsupported features */
-   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-   case PIPE_CAP_SHADER_STENCIL_EXPORT:
    case PIPE_CAP_DEPTH_CLIP_DISABLE:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
    case PIPE_CAP_INDEP_BLEND_ENABLE:
-   case PIPE_CAP_INDEP_BLEND_FUNC:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_QUERY_TIMESTAMP:
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_FAKE_SW_MSAA:
+      return sws->have_vgpu10;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+      return sws->have_vgpu10 ? 4 : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return svgascreen->ms_samples ? 1 : 0;
+
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return SVGA3D_DX_MAX_RESOURCE_SIZE;
+
    case PIPE_CAP_MIN_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0;
    case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0;
+
    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-   case PIPE_CAP_CONDITIONAL_RENDER:
-   case PIPE_CAP_TEXTURE_BARRIER:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+
    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return sws->have_vgpu10 ? 256 : 0;
    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return sws->have_vgpu10 ? 1024 : 0;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 1; /* may be a sw fallback, depending on restart index */
+
+   /* Unsupported features */
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_TEXTURE_BARRIER:
    case PIPE_CAP_MAX_VERTEX_STREAMS:
    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_COMPUTE:
    case PIPE_CAP_START_INSTANCE:
-   case PIPE_CAP_QUERY_TIMESTAMP:
-   case PIPE_CAP_TEXTURE_MULTISAMPLE:
    case PIPE_CAP_CUBE_MAP_ARRAY:
-   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
    case PIPE_CAP_TEXTURE_GATHER_SM5:
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -288,8 +352,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 0;
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
       return 64;
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-      return 1;
+      return 1;  /* need 4-byte alignment for all offsets and strides */
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
    case PIPE_CAP_MAX_VIEWPORTS:
@@ -320,11 +386,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    return 0;
 }
 
-static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+
+static int
+vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                       enum pipe_shader_cap param)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
+   unsigned val;
+
+   assert(!sws->have_vgpu10);
 
    switch (shader)
    {
@@ -347,9 +418,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
          return 1;
       case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
       case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
 	 /* 
 	  * Although PS 3.0 has some addressing abilities it can only represent
@@ -392,9 +462,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       {
       case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
       case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result))
-            return 512;
-         return result.u;
+         return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS,
+                             512);
       case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
       case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
          /* XXX: until we have vertex texture support */
@@ -410,9 +479,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
          return 1;
       case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
       case PIPE_SHADER_CAP_MAX_PREDS:
          return 1;
       case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -459,8 +527,102 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
 }
 
 
+static int
+vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                        enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+
+   assert(sws->have_vgpu10);
+   (void) sws;  /* silence unused var warnings in non-debug builds */
+
+   /* Only VS, GS, FS supported */
+   if (shader != PIPE_SHADER_VERTEX &&
+       shader != PIPE_SHADER_GEOMETRY &&
+       shader != PIPE_SHADER_FRAGMENT) {
+      return 0;
+   }
+
+   /* NOTE: we do not query the device for any caps/limits at this time */
+
+   /* Generally the same limits for vertex, geometry and fragment shaders */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 64 * 1024;
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 64;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_INPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_INPUTS;
+      else
+         return VGPU10_MAX_VS_INPUTS;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_OUTPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_OUTPUTS;
+      else
+         return VGPU10_MAX_VS_OUTPUTS;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return svgascreen->max_const_buffers;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return VGPU10_MAX_TEMPS;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return TRUE; /* XXX verify */
+   case PIPE_SHADER_CAP_MAX_PREDS:
+      return 0;
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+   case PIPE_SHADER_CAP_INTEGERS:
+      return TRUE;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return SVGA3D_DX_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0;
+   default:
+      debug_printf("Unexpected vgpu10 shader query %u\n", param);
+      return 0;
+   }
+   return 0;
+}
+
+
+static int
+svga_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                      enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   if (sws->have_vgpu10) {
+      return vgpu10_get_shader_param(screen, shader, param);
+   }
+   else {
+      return vgpu9_get_shader_param(screen, shader, param);
+   }
+}
+
+
 /**
- * Implemnt pipe_screen::is_format_supported().
+ * Implement pipe_screen::is_format_supported().
  * \param bindings  bitmask of PIPE_BIND_x flags
  */
 static boolean
@@ -478,7 +640,12 @@ svga_is_format_supported( struct pipe_screen *screen,
    assert(bindings);
 
    if (sample_count > 1) {
-      return FALSE;
+      /* In ms_samples, if bit N is set it means that we support
+       * multisample with N+1 samples per pixel.
+       */
+      if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
+         return FALSE;
+      }
    }
 
    svga_format = svga_translate_format(ss, format, bindings);
@@ -486,6 +653,22 @@ svga_is_format_supported( struct pipe_screen *screen,
       return FALSE;
    }
 
+   /* we don't support sRGB rendering into display targets */
+   if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+      return FALSE;
+   }
+
+   /*
+    * For VGPU10 vertex formats, skip querying host capabilities
+    */
+
+   if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
+      SVGA3dSurfaceFormat svga_format;
+      unsigned flags;
+      svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
+      return svga_format != SVGA3D_FORMAT_INVALID;
+   }
+
    /*
     * Override host capabilities, so that we end up with the same
     * visuals for all virtual hardware implementations.
@@ -498,6 +681,12 @@ svga_is_format_supported( struct pipe_screen *screen,
       case SVGA3D_R5G6B5:
          break;
 
+      /* VGPU10 formats */
+      case SVGA3D_B8G8R8A8_UNORM:
+      case SVGA3D_B8G8R8X8_UNORM:
+      case SVGA3D_B5G6R5_UNORM:
+         break;
+
       /* Often unsupported/problematic. This means we end up with the same
        * visuals for all virtual hardware implementations.
        */
@@ -516,22 +705,32 @@ svga_is_format_supported( struct pipe_screen *screen,
 
    svga_get_format_cap(ss, svga_format, &caps);
 
+   if (bindings & PIPE_BIND_RENDER_TARGET) {
+      /* Check that the color surface is blendable, unless it's an
+       * integer format.
+       */
+      if (!svga_format_is_integer(svga_format) &&
+          (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
+         return FALSE;
+      }
+   }
+
    mask.value = 0;
    if (bindings & PIPE_BIND_RENDER_TARGET) {
-      mask.offscreenRenderTarget = 1;
+      mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
    }
    if (bindings & PIPE_BIND_DEPTH_STENCIL) {
-      mask.zStencil = 1;
+      mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
    }
    if (bindings & PIPE_BIND_SAMPLER_VIEW) {
-      mask.texture = 1;
+      mask.value |= SVGA3DFORMAT_OP_TEXTURE;
    }
 
    if (target == PIPE_TEXTURE_CUBE) {
-      mask.cubeTexture = 1;
+      mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
    }
-   if (target == PIPE_TEXTURE_3D) {
-      mask.volumeTexture = 1;
+   else if (target == PIPE_TEXTURE_3D) {
+      mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
    }
 
    return (caps.value & mask.value) == mask.value;
@@ -611,8 +810,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
 {
    struct svga_screen *svgascreen;
    struct pipe_screen *screen;
-   SVGA3dDevCapResult result;
-   boolean use_vs30, use_ps30;
 
 #ifdef DEBUG
    SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
@@ -642,6 +839,7 @@ svga_screen_create(struct svga_winsys_screen *sws)
    screen->get_param = svga_get_param;
    screen->get_shader_param = svga_get_shader_param;
    screen->get_paramf = svga_get_paramf;
+   screen->get_timestamp = NULL;
    screen->is_format_supported = svga_is_format_supported;
    screen->context_create = svga_context_create;
    screen->fence_reference = svga_fence_reference;
@@ -657,18 +855,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
       svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
    }
 
-   use_ps30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
-
-   use_vs30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
-
-   /* we require Shader model 3.0 or later */
-   if (!use_ps30 || !use_vs30)
-      goto error2;
-
    /*
     * The D16, D24X8, and D24S8 formats always do an implicit shadow compare
     * when sampled from, where as the DF16, DF24, and D24S8_INT do not.  So
@@ -716,46 +902,77 @@ svga_screen_create(struct svga_winsys_screen *sws)
 
    /* Query device caps
     */
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result))
-      svgascreen->haveLineStipple = FALSE;
-   else
-      svgascreen->haveLineStipple = result.u;
+   if (sws->have_vgpu10) {
+      svgascreen->haveProvokingVertex
+         = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE);
+      svgascreen->haveLineSmooth = TRUE;
+      svgascreen->maxPointSize = 80.0F;
+      svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;
+
+      /* Multisample samples per pixel */
+      svgascreen->ms_samples =
+         get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
+
+      /* Maximum number of constant buffers */
+      svgascreen->max_const_buffers =
+         get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+      assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS);
+   }
+   else {
+      /* VGPU9 */
+      unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION,
+                                     SVGA3DVSVERSION_NONE);
+      unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION,
+                                     SVGA3DPSVERSION_NONE);
+
+      /* we require Shader model 3.0 or later */
+      if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) {
+         goto error2;
+      }
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result))
-      svgascreen->haveLineSmooth = FALSE;
-   else
-      svgascreen->haveLineSmooth = result.u;
+      svgascreen->haveProvokingVertex = FALSE;
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result))
-      svgascreen->maxLineWidth = 1.0F;
-   else
-      svgascreen->maxLineWidth = result.f;
+      svgascreen->haveLineSmooth =
+         get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE);
 
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result))
-      svgascreen->maxLineWidthAA = 1.0F;
-   else
-      svgascreen->maxLineWidthAA = result.f;
+      svgascreen->maxPointSize =
+         get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f);
+      /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */
+      svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f);
+
+      /* The SVGA3D device always supports 4 targets at this time, regardless
+       * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
+       */
+      svgascreen->max_color_buffers = 4;
+
+      /* Only support one constant buffer
+       */
+      svgascreen->max_const_buffers = 1;
 
-   if (0)
+      /* No multisampling */
+      svgascreen->ms_samples = 0;
+   }
+
+   /* common VGPU9 / VGPU10 caps */
+   svgascreen->haveLineStipple =
+      get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE);
+
+   svgascreen->maxLineWidth =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f);
+
+   svgascreen->maxLineWidthAA =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f);
+
+   if (0) {
+      debug_printf("svga: haveProvokingVertex %u\n",
+                   svgascreen->haveProvokingVertex);
       debug_printf("svga: haveLineStip %u  "
                    "haveLineSmooth %u  maxLineWidth %f\n",
                    svgascreen->haveLineStipple, svgascreen->haveLineSmooth,
                    svgascreen->maxLineWidth);
-
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) {
-      svgascreen->maxPointSize = 1.0F;
-   } else {
-      /* Keep this to a reasonable size to avoid failures in
-       * conform/pntaa.c:
-       */
-      svgascreen->maxPointSize = MIN2(result.f, 80.0f);
+      debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize);
    }
 
-   /* The SVGA3D device always supports 4 targets at this time, regardless
-    * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
-    */
-   svgascreen->max_color_buffers = 4;
-
    pipe_mutex_init(svgascreen->tex_mutex);
    pipe_mutex_init(svgascreen->swc_mutex);
 
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index ea1e743dfe5..5581d2e1ffd 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -1,4 +1,4 @@
-/**********************************************************
+ /**********************************************************
  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
@@ -48,10 +48,13 @@ struct svga_screen
    SVGA3dHardwareVersion hw_version;
 
    /** Device caps */
+   boolean haveProvokingVertex;
    boolean haveLineStipple, haveLineSmooth;
    float maxLineWidth, maxLineWidthAA;
    float maxPointSize;
    unsigned max_color_buffers;
+   unsigned max_const_buffers;
+   unsigned ms_samples;
 
    struct {
       boolean force_level_surface_view;
@@ -69,6 +72,7 @@ struct svga_screen
    /* which formats to translate depth formats into */
    struct {
      enum SVGA3dSurfaceFormat z16;
+
      /* note gallium order */
      enum SVGA3dSurfaceFormat x8z24;
      enum SVGA3dSurfaceFormat s8z24;
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index f99a0b305a7..5b441295715 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -115,8 +115,14 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
 
       assert(entry->handle);
 
+      /* If the key matches and the fence is signalled (the surface is no
+       * longer needed) the lookup was successful.  We found a surface that
+       * can be reused.
+       * We unlink the surface from the cache entry and we add the entry to
+       * the 'empty' list.
+       */
       if (memcmp(&entry->key, key, sizeof *key) == 0 &&
-         sws->fence_signalled(sws, entry->fence, 0) == 0) {
+          sws->fence_signalled(sws, entry->fence, 0) == 0) {
          unsigned surf_size;
 
          assert(sws->surface_is_flushed(sws, entry->handle));
@@ -124,10 +130,13 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
          handle = entry->handle; /* Reference is transfered here. */
          entry->handle = NULL;
 
+         /* Remove from hash table */
          LIST_DEL(&entry->bucket_head);
 
+         /* remove from LRU list */
          LIST_DEL(&entry->head);
 
+         /* Add the cache entry (but not the surface!) to the empty list */
          LIST_ADD(&entry->head, &cache->empty);
 
          /* update the cache size */
@@ -195,7 +204,8 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen,
 
 
 /**
- * Transfers a handle reference.
+ * Add a surface to the cache.  This is done when the driver deletes
+ * the surface.  Note: transfers a handle reference.
  */
 static void
 svga_screen_cache_add(struct svga_screen *svgascreen,
@@ -207,17 +217,17 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    struct svga_host_surface_cache_entry *entry = NULL;
    struct svga_winsys_surface *handle = *p_handle;
    unsigned surf_size;
-   
+
    assert(key->cachable);
 
    if (!handle)
       return;
-   
+
    surf_size = surface_size(key);
 
    *p_handle = NULL;
    pipe_mutex_lock(cache->mutex);
-   
+
    if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
       /* this surface is too large to cache, just free it */
       sws->surface_reference(sws, &handle, NULL);
@@ -245,10 +255,13 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    }
 
    if (!LIST_IS_EMPTY(&cache->empty)) {
-      /* use the first empty entry */
+      /* An empty entry has no surface associated with it.
+       * Use the first empty entry.
+       */
       entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
                          cache->empty.next, head);
 
+      /* Remove from LRU list */
       LIST_DEL(&entry->head);
    }
    else if (!LIST_IS_EMPTY(&cache->unused)) {
@@ -262,12 +275,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
 
       sws->surface_reference(sws, &entry->handle, NULL);
 
+      /* Remove from hash table */
       LIST_DEL(&entry->bucket_head);
 
+      /* Remove from LRU list */
       LIST_DEL(&entry->head);
    }
 
    if (entry) {
+      assert(entry->handle == NULL);
       entry->handle = handle;
       memcpy(&entry->key, key, sizeof entry->key);
 
@@ -304,6 +320,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
 
    pipe_mutex_lock(cache->mutex);
 
+   /* Loop over entries in the validated list */
    curr = cache->validated.next;
    next = curr->next;
    while (curr != &cache->validated) {
@@ -312,12 +329,15 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
       assert(entry->handle);
 
       if (sws->surface_is_flushed(sws, entry->handle)) {
+         /* remove entry from LRU list */
          LIST_DEL(&entry->head);
 
          svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
 
+         /* Add entry to the unused list */
          LIST_ADD(&entry->head, &cache->unused);
 
+         /* Add entry to the hash table bucket */
          bucket = svga_screen_cache_bucket(&entry->key);
          LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
       }
@@ -388,9 +408,12 @@ svga_screen_cache_init(struct svga_screen *svgascreen)
  * Allocate a new host-side surface.  If the surface is marked as cachable,
  * first try re-using a surface in the cache of freed surfaces.  Otherwise,
  * allocate a new surface.
+ * \param bind_flags  bitmask of PIPE_BIND_x flags
+ * \param usage  one of PIPE_USAGE_x values
  */
 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                            struct svga_host_surface_cache_key *key)
 {
    struct svga_winsys_screen *sws = svgascreen->sws;
@@ -398,17 +421,20 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
    boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
 
    SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-            "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+            "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
             __FUNCTION__,
             key->size.width,
             key->size.height,
             key->size.depth,
             key->numMipLevels,
             key->numFaces,
+            key->arraySize,
             key->cachable);
 
    if (cachable) {
       if (key->format == SVGA3D_BUFFER) {
+         SVGA3dSurfaceFlags hint_flag;
+
          /* For buffers, round the buffer size up to the nearest power
           * of two to increase the probability of cache hits.  Keep
           * texture surface dimensions unchanged.
@@ -417,15 +443,33 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
          while (size < key->size.width)
             size <<= 1;
          key->size.width = size;
-	 /* Since we're reusing buffers we're effectively transforming all
-	  * of them into dynamic buffers.
-	  *
-	  * It would be nice to not cache long lived static buffers. But there
-	  * is no way to detect the long lived from short lived ones yet. A
-	  * good heuristic would be buffer size.
-	  */
-	 key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
-	 key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+
+         /* Determine whether the buffer is static or dynamic.
+          * This is a bit of a heuristic which can be tuned as needed.
+          */
+         if (usage == PIPE_USAGE_DEFAULT ||
+             usage == PIPE_USAGE_IMMUTABLE) {
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
+            /* Index buffers don't change too often.  Mark them as static.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else {
+            /* Since we're reusing buffers we're effectively transforming all
+             * of them into dynamic buffers.
+             *
+             * It would be nice to not cache long lived static buffers. But there
+             * is no way to detect the long lived from short lived ones yet. A
+             * good heuristic would be buffer size.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
+         }
+
+         key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
+                         SVGA3D_SURFACE_HINT_DYNAMIC);
+         key->flags |= hint_flag;
       }
 
       handle = svga_screen_cache_lookup(svgascreen, key);
@@ -436,25 +480,32 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                      key->size.width);
          else
             SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-                     "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+                     "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
                      key->size.width,
                      key->size.height,
                      key->size.depth,
                      key->numMipLevels,
-                     key->numFaces);
+                     key->numFaces,
+                     key->arraySize);
       }
    }
 
    if (!handle) {
+      unsigned usage = 0;
+
+      if (!key->cachable)
+         usage |= SVGA_SURFACE_USAGE_SHARED;
+      if (key->scanout)
+         usage |= SVGA_SURFACE_USAGE_SCANOUT;
+
       handle = sws->surface_create(sws,
                                    key->flags,
                                    key->format,
-                                   key->cachable ?
-                                   0 : SVGA_SURFACE_USAGE_SHARED,
+                                   usage,
                                    key->size,
-                                   key->numFaces,
+                                   key->numFaces * key->arraySize,
                                    key->numMipLevels,
-                                   0);
+                                   key->sampleCount);
       if (handle)
          SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
                   "  CREATE sid %p sz %dx%dx%d\n",
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
index 56ac62b39c1..424eb2c5ae1 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -62,9 +62,12 @@ struct svga_host_surface_cache_key
    SVGA3dSurfaceFlags flags;
    SVGA3dSurfaceFormat format;
    SVGA3dSize size;
-   uint32_t numFaces:24;
-   uint32_t numMipLevels:7;
+   uint32_t numFaces:3;
+   uint32_t arraySize:16;
+   uint32_t numMipLevels:6;
    uint32_t cachable:1;         /* False if this is a shared surface */
+   uint32_t sampleCount:5;
+   uint32_t scanout:1;
 };
 
 
@@ -137,6 +140,7 @@ svga_screen_cache_init(struct svga_screen *svgascreen);
 
 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                            struct svga_host_surface_cache_key *key);
 
 void
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index 46efa07df2d..d46e7ebbc38 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -27,14 +27,318 @@
 #include "util/u_memory.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
 #include "svga_shader.h"
 
 
+/**
+ * This bit isn't really used anywhere.  It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader.  A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_inputs; i++) {
+      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+         unsigned j = info->input_semantic_index[i];
+         assert(j < sizeof(mask) * 8);
+         mask |= ((uint64_t) 1) << j;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      switch (info->output_semantic_name[i]) {
+      case TGSI_SEMANTIC_GENERIC:
+         {
+            unsigned j = info->output_semantic_index[i];
+            assert(j < sizeof(mask) * 8);
+            mask |= ((uint64_t) 1) << j;
+         }
+         break;
+      case TGSI_SEMANTIC_FOG:
+         mask |= FOG_GENERIC_BIT;
+         break;
+      }
+   }
+
+   return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used.  The remap_table will contain:
+ *   table[1] = 0;
+ *   table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING])
+{
+   /* Note texcoord[0] is reserved so start at 1 */
+   unsigned count = 1, i;
+
+   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+      remap_table[i] = -1;
+   }
+
+   /* for each bit set in generic_mask */
+   while (generics_mask) {
+      unsigned index = ffsll(generics_mask) - 1;
+      remap_table[index] = count++;
+      generics_mask &= ~((uint64_t) 1 << index);
+   }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer.  If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output.  Just allocate
+ * the next free value in that case.  Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index)
+{
+   assert(generic_index < MAX_GENERIC_VARYING);
+
+   if (generic_index >= MAX_GENERIC_VARYING) {
+      /* just don't return a random/garbage value */
+      generic_index = MAX_GENERIC_VARYING - 1;
+   }
+
+   if (remap_table[generic_index] == -1) {
+      /* This is a VS output that has no matching PS input.  Find a
+       * free index.
+       */
+      int i, max = 0;
+      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+         max = MAX2(max, remap_table[i]);
+      }
+      remap_table[generic_index] = max + 1;
+   }
+
+   return remap_table[generic_index];
+}
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state.  This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key)
+{
+   unsigned i, idx = 0;
+
+   assert(shader < Elements(svga->curr.num_sampler_views));
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      if (view) {
+         assert(svga->curr.sampler[shader][i]);
+         assert(view->texture);
+         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+         key->tex[i].texture_target = view->texture->target;
+
+         /* 1D/2D array textures with one slice are treated as non-arrays
+          * by the SVGA3D device.  Convert the texture type here so that
+          * we emit the right TEX/SAMPLE instruction in the shader.
+          */
+         if (view->texture->array_size == 1) {
+            if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_1D;
+            }
+            else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_2D;
+            }
+         }
+
+         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
+         if (!svga->curr.sampler[shader][i]->normalized_coords) {
+            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+
+         key->tex[i].swizzle_r = view->swizzle_r;
+         key->tex[i].swizzle_g = view->swizzle_g;
+         key->tex[i].swizzle_b = view->swizzle_b;
+         key->tex[i].swizzle_a = view->swizzle_a;
+
+         key->tex[i].return_type = svga_get_texture_datatype(view->format);
+      }
+   }
+   key->num_textures = svga->curr.num_sampler_views[shader];
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key)
+{
+   struct svga_shader_variant *variant = shader->variants;
+
+   assert(key);
+
+   for ( ; variant; variant = variant->next) {
+      if (svga_compile_keys_equal(key, &variant->key))
+         return variant;
+   }
+   return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+                             const struct svga_token_key *key)
+{
+   struct svga_shader *shader = pshader;
+
+   assert(key);
+
+   for ( ; shader; shader = shader->next) {
+      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+         return shader;
+   }
+   return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+                       SVGA3dShaderType type,
+                       struct svga_shader_variant *variant,
+                       unsigned codeLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+
+   /**
+    * Create gb memory for the shader and upload the shader code.
+    * Kernel module will allocate an id for the shader and issue
+    * the DefineGBShader command.
+    */
+   variant->gb_shader = sws->shader_create(sws, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+   return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+                        SVGA3dShaderType type,
+                        struct svga_shader_variant *variant,
+                        unsigned codeLen)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   enum pipe_error ret;
+
+   /**
+    * Shaders in VGPU10 enabled device reside in the device COTable.
+    * SVGA driver will allocate an integer ID for the shader and
+    * issue DXDefineShader and DXBindShader commands.
+    */
+   variant->id = util_bitmask_add(svga->shader_id_bm);
+   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   /* Create gb memory for the shader and upload the shader code */
+   variant->gb_shader = swc->shader_create(swc,
+                                           variant->id, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader) {
+      /* Free the shader ID */
+      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+      goto fail_no_allocation;
+   }
+
+   /**
+    * Since we don't want to do any flush within state emission to avoid
+    * partial state in a command buffer, it's important to make sure that
+    * there is enough room to send both the DXDefineShader & DXBindShader
+    * commands in the same command buffer. So let's send both
+    * commands in one command reservation. If it fails, we'll undo
+    * the shader creation and return an error.
+    */
+   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+                                           variant->id, type, codeLen);
+
+   if (ret != PIPE_OK)
+      goto fail;
+
+   return PIPE_OK;
+
+fail:
+   swc->shader_destroy(swc, variant->gb_shader);
+   variant->gb_shader = NULL;
+
+fail_no_allocation:
+   util_bitmask_clear(svga->shader_id_bm, variant->id);
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
 
 /**
  * Issue the SVGA3D commands to define a new shader.
- * \param result  contains the shader tokens, etc.  The result->id field will
- *                be set here.
+ * \param variant  contains the shader tokens, etc.  The result->id field will
+ *                 be set here.
  */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
@@ -42,27 +346,17 @@ svga_define_shader(struct svga_context *svga,
                    struct svga_shader_variant *variant)
 {
    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+   enum pipe_error ret;
 
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-      enum pipe_error ret;
-
-      variant->gb_shader = sws->shader_create(sws, type,
-                                              variant->tokens, codeLen);
-      if (!variant->gb_shader)
-         return PIPE_ERROR_OUT_OF_MEMORY;
-
-      ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
-      if (ret != PIPE_OK) {
-         sws->shader_destroy(sws, variant->gb_shader);
-         variant->gb_shader = NULL;
-      }
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
 
-      return ret;
+   if (svga_have_gb_objects(svga)) {
+      if (svga_have_vgpu10(svga))
+         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+      else
+         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
    }
    else {
-      enum pipe_error ret;
-
       /* Allocate an integer ID for the shader */
       variant->id = util_bitmask_add(svga->shader_id_bm);
       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
@@ -80,14 +374,45 @@ svga_define_shader(struct svga_context *svga,
          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
          util_bitmask_clear(svga->shader_id_bm, variant->id);
          variant->id = UTIL_BITMASK_INVALID_INDEX;
-         return ret;
       }
    }
 
-   return PIPE_OK;
+   return ret;
 }
 
 
+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result  the shader to bind.
+ */
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant)
+{
+   enum pipe_error ret;
+   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_GS ||
+          type == SVGA3D_SHADERTYPE_PS);
+
+   if (svga_have_gb_objects(svga)) {
+      struct svga_winsys_gb_shader *gbshader =
+         variant ? variant->gb_shader : NULL;
+
+      if (svga_have_vgpu10(svga))
+         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+      else
+         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+   }
+   else {
+      ret = SVGA3D_SetShader(svga->swc, type, id);
+   }
+
+   return ret;
+}
+
 
 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
@@ -96,32 +421,92 @@ svga_destroy_shader_variant(struct svga_context *svga,
 {
    enum pipe_error ret = PIPE_OK;
 
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-
-      sws->shader_destroy(sws, variant->gb_shader);
+   if (svga_have_gb_objects(svga) && variant->gb_shader) {
+      if (svga_have_vgpu10(svga)) {
+         struct svga_winsys_context *swc = svga->swc;
+         swc->shader_destroy(swc, variant->gb_shader);
+         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
+      }
+      else {
+         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+         sws->shader_destroy(sws, variant->gb_shader);
+      }
       variant->gb_shader = NULL;
-      goto end;
    }
-
-   /* first try */
-   if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-
-      if (ret != PIPE_OK) {
-         /* flush and try again */
-         svga_context_flush(svga, NULL);
-
+   else {
+      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
          ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         assert(ret == PIPE_OK);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
-
-      util_bitmask_clear(svga->shader_id_bm, variant->id);
    }
 
-end:
    FREE((unsigned *)variant->tokens);
    FREE(variant);
 
    return ret;
 }
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * If the underlying winsys layer does not need resource rebinding,
+    * just clear the rebind flags and return.
+    */
+   if (swc->resource_rebind == NULL) {
+      svga->rebind.flags.vs = 0;
+      svga->rebind.flags.gs = 0;
+      svga->rebind.flags.fs = 0;
+
+      return PIPE_OK;
+   }
+
+   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.vs = 0;
+
+   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.gs = 0;
+
+   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.fs = 0;
+
+   return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index 5102159b96a..b0800c1ecad 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -27,19 +27,259 @@
 #define SVGA_SHADER_H
 
 #include "svga3d_reg.h"
+#include "svga_context.h"
+#include "svga_streamout.h"
 
-struct svga_shader_variant;
+
+/**
+ * We use a 64-bit mask to keep track of the generic indexes.
+ * This is the maximum semantic index for a TGSI GENERIC[i] register.
+ */
+#define MAX_GENERIC_VARYING 64
+
+
+struct svga_context;
+
+
+struct svga_compile_key
+{
+   /* vertex shader only */
+   struct {
+      uint64_t fs_generic_inputs;
+      unsigned passthrough:1;
+      unsigned need_prescale:1;
+      unsigned undo_viewport:1;
+      unsigned allow_psiz:1;
+      /** The following are all 32-bit bitmasks (per VS input) */
+      unsigned adjust_attrib_range;
+      unsigned attrib_is_pure_int;
+      unsigned adjust_attrib_w_1;
+      unsigned adjust_attrib_itof;
+      unsigned adjust_attrib_utof;
+      unsigned attrib_is_bgra;
+      unsigned attrib_puint_to_snorm;
+      unsigned attrib_puint_to_uscaled;
+      unsigned attrib_puint_to_sscaled;
+   } vs;
+
+   /* geometry shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      unsigned need_prescale:1;
+      unsigned writes_psize:1;
+      unsigned wide_point:1;
+   } gs;
+
+   /* fragment shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      uint64_t gs_generic_outputs;
+      unsigned light_twoside:1;
+      unsigned front_ccw:1;
+      unsigned white_fragments:1;
+      unsigned flatshade:1;
+      unsigned pstipple:1;
+      unsigned alpha_func:4;  /**< SVGA3D_CMP_x */
+      unsigned write_color0_to_n_cbufs:4;
+      unsigned aa_point:1;
+      int aa_point_coord_index;
+      float alpha_ref;
+   } fs;
+
+   /* any shader type */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   unsigned num_textures:8;
+   unsigned num_unnormalized_coords:8;
+   unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+   unsigned sprite_origin_lower_left:1;
+   unsigned sprite_coord_enable;
+   struct {
+      unsigned compare_mode:1;
+      unsigned compare_func:3;
+      unsigned unnormalized:1;
+      unsigned width_height_idx:5; /**< texture unit */
+      unsigned texture_target:4;   /**< PIPE_TEXTURE_x */
+      unsigned texture_msaa:1;    /**< A multisample texture? */
+      unsigned sprite_texgen:1;
+      unsigned swizzle_r:3;
+      unsigned swizzle_g:3;
+      unsigned swizzle_b:3;
+      unsigned swizzle_a:3;
+      unsigned return_type:3;  /**< TGSI_RETURN_TYPE_x */
+   } tex[PIPE_MAX_SAMPLERS];
+   /* Note: svga_compile_keys_equal() depends on the variable-size
+    * tex[] array being at the end of this structure.
+    */
+};
+
+/* A key for a variant of token string of a shader */
+struct svga_token_key {
+   struct {
+      unsigned sprite_coord_enable:24;
+      unsigned sprite_origin_upper_left:1;
+      unsigned point_pos_stream_out:1;
+      unsigned writes_psize:1;
+      unsigned aa_point:1;
+   } gs;
+};
+
+/**
+ * A single TGSI shader may be compiled into different variants of
+ * SVGA3D shaders depending on the compile key.  Each user shader
+ * will have a linked list of these variants.
+ */
+struct svga_shader_variant
+{
+   const struct svga_shader *shader;
+
+   /** Parameters used to generate this variant */
+   struct svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
+    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
+    */
+   unsigned id;
+
+   /** Start of extra constants (number of float[4] constants) */
+   unsigned extra_const_start;
+
+   /* GB object buffer containing the bytecode */
+   struct svga_winsys_gb_shader *gb_shader;
+
+   boolean uses_flat_interp;   /** TRUE if flat interpolation qualifier is
+                                *  applied to any of the varyings.
+                                */
+
+   /** For FS-based polygon stipple */
+   unsigned pstipple_sampler_unit;
+
+   /** Next variant */
+   struct svga_shader_variant *next;
+};
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+   struct svga_token_key token_key;     /* token key for the token string */
+   struct tgsi_shader_info info;
+
+   /* List of shaders with tokens derived from the same token string */
+   struct svga_shader *next;
+   struct svga_shader *parent;   /* shader with the original token string */
+
+   struct svga_stream_output *stream_output;
+
+   /** Head of linked list of compiled variants */
+   struct svga_shader_variant *variants;
+
+   unsigned id;  /**< for debugging only */
+};
+
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+
+   struct draw_fragment_shader *draw_shader;
+
+   /** Mask of which generic varying variables are read by this shader */
+   uint64_t generic_inputs;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+};
+
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+
+   /** Mask of which generic varying variables are written by this shader */
+   uint64_t generic_outputs;
+
+   /** Generated geometry shader that goes with this vertex shader */
+   struct svga_geometry_shader *gs;
+};
+
+
+struct svga_geometry_shader
+{
+   struct svga_shader base;
+
+   struct draw_geometry_shader *draw_shader;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   uint64_t generic_outputs;
+
+   unsigned aa_point_coord_index; /* generic index for aa point coord */
+
+   unsigned wide_point:1;      /* set if the shader emulates wide point */
+};
+
+
+static inline boolean
+svga_compile_keys_equal(const struct svga_compile_key *a,
+                        const struct svga_compile_key *b)
+{
+   unsigned key_size =
+      (const char *) &a->tex[a->num_textures] - (const char *) a;
+
+   return memcmp(a, b, key_size) == 0;
+}
+
+
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
+
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
+
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING]);
+
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index);
+
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key);
+
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key);
+
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *shader,
+                             const struct svga_token_key *key);
 
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
                    SVGA3dShaderType type,
                    struct svga_shader_variant *variant);
 
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant);
+
 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
                             SVGA3dShaderType type,
                             struct svga_shader_variant *variant);
 
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga);
 
 /**
  * Check if a shader's bytecode exceeds the device limits.
@@ -62,4 +302,40 @@ svga_shader_too_large(const struct svga_context *svga,
 }
 
 
+/**
+ * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static inline SVGA3dShaderType
+svga_shader_type(unsigned shader)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      return SVGA3D_SHADERTYPE_VS;
+   case PIPE_SHADER_GEOMETRY:
+      return SVGA3D_SHADERTYPE_GS;
+   case PIPE_SHADER_FRAGMENT:
+      return SVGA3D_SHADERTYPE_PS;
+   default:
+      assert(!"Invalid shader type");
+      return SVGA3D_SHADERTYPE_VS;
+   }
+}
+
+
+/** Does the current VS have stream output? */
+static inline boolean
+svga_have_vs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL;
+}
+
+
+/** Does the current GS have stream output? */
+static inline boolean
+svga_have_gs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL;
+}
+
+
 #endif /* SVGA_SHADER_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
index b0bc867f63a..37d16dc9afe 100644
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -23,6 +23,7 @@
  *
  **********************************************************/
 
+#include "util/u_bitmask.h"
 #include "util/u_debug.h"
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
@@ -63,14 +64,19 @@ static const struct svga_tracked_state *hw_clear_state[] =
  */
 static const struct svga_tracked_state *hw_draw_state[] =
 {
+   &svga_need_tgsi_transform,
    &svga_hw_fs,
+   &svga_hw_gs,
    &svga_hw_vs,
    &svga_hw_rss,
-   &svga_hw_tss,
-   &svga_hw_tss_binding,
+   &svga_hw_sampler,           /* VGPU10 */
+   &svga_hw_sampler_bindings,  /* VGPU10 */
+   &svga_hw_tss,               /* pre-VGPU10 */
+   &svga_hw_tss_binding,       /* pre-VGPU10 */
    &svga_hw_clip_planes,
    &svga_hw_vdecl,
    &svga_hw_fs_constants,
+   &svga_hw_gs_constants,
    &svga_hw_vs_constants,
    NULL
 };
@@ -255,23 +261,55 @@ do {                                            \
  */
 enum pipe_error svga_emit_initial_state( struct svga_context *svga )
 {
-   SVGA3dRenderState *rs;
-   unsigned count = 0;
-   const unsigned COUNT = 2;
-   enum pipe_error ret;
-
-   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
-   if (ret != PIPE_OK)
+   if (svga_have_vgpu10(svga)) {
+      SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm);
+      enum pipe_error ret;
+
+      /* XXX preliminary code */
+      ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             id,
+                                             SVGA3D_FILLMODE_FILL,
+                                             SVGA3D_CULL_NONE,
+                                             1, /* frontCounterClockwise */
+                                             0, /* depthBias */
+                                             0.0f, /* depthBiasClamp */
+                                             0.0f, /* slopeScaledDepthBiasClamp */
+                                             0, /* depthClampEnable */
+                                             0, /* scissorEnable */
+                                             0, /* multisampleEnable */
+                                             0, /* aalineEnable */
+                                             1.0f, /* lineWidth */
+                                             0, /* lineStippleEnable */
+                                             0, /* lineStippleFactor */
+                                             0, /* lineStipplePattern */
+                                             0); /* provokingVertexLast */
+
+
+      assert(ret == PIPE_OK);
+
+      ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id);
       return ret;
+   }
+   else {
+      SVGA3dRenderState *rs;
+      unsigned count = 0;
+      const unsigned COUNT = 2;
+      enum pipe_error ret;
 
-   /* Always use D3D style coordinate space as this is the only one
-    * which is implemented on all backends.
-    */
-   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
-   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
-   
-   assert( COUNT == count );
-   SVGA_FIFOCommitAll( svga->swc );
+      ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+      if (ret != PIPE_OK)
+         return ret;
 
-   return PIPE_OK;
+      /* Always use D3D style coordinate space as this is the only one
+       * which is implemented on all backends.
+       */
+      EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE,
+              SVGA3D_COORDINATE_LEFTHANDED );
+      EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+      assert( COUNT == count );
+      SVGA_FIFOCommitAll( svga->swc );
+
+      return PIPE_OK;
+   }
 }
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
index 3325626a4d1..04b20e161fe 100644
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -57,14 +57,20 @@ extern struct svga_tracked_state svga_hw_framebuffer;
 
 /* HW_DRAW
  */
+extern struct svga_tracked_state svga_need_tgsi_transform;
 extern struct svga_tracked_state svga_hw_vs;
 extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_gs;
 extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_pstipple;
+extern struct svga_tracked_state svga_hw_sampler;
+extern struct svga_tracked_state svga_hw_sampler_bindings;
 extern struct svga_tracked_state svga_hw_tss;
 extern struct svga_tracked_state svga_hw_tss_binding;
 extern struct svga_tracked_state svga_hw_clip_planes;
 extern struct svga_tracked_state svga_hw_vdecl;
 extern struct svga_tracked_state svga_hw_fs_constants;
+extern struct svga_tracked_state svga_hw_gs_constants;
 extern struct svga_tracked_state svga_hw_vs_constants;
 
 /* SWTNL_DRAW
@@ -93,10 +99,14 @@ enum pipe_error svga_emit_initial_state( struct svga_context *svga );
 
 enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
 
+enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga );
+
 enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
 
 enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);
 
 enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);
 
+enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga);
+
 #endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 1e1fbb099c6..e0bbe013c02 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -1,3 +1,4 @@
+
 /**********************************************************
  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
  *
@@ -23,9 +24,11 @@
  *
  **********************************************************/
 
+#include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
+#include "util/u_upload_mgr.h"
 
 #include "svga_screen.h"
 #include "svga_context.h"
@@ -34,6 +37,7 @@
 #include "svga_tgsi.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"
+#include "svga_shader.h"
 
 #include "svga_hw_reg.h"
 
@@ -52,65 +56,135 @@
 /** Guest-backed surface constant buffers must be this size */
 #define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX)
 
+
 /**
- * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ * Emit any extra shader-type-independent shader constants into the buffer
+ * pointed to by 'dest'.
+ * \return number of float[4] constants put into the 'dest' buffer
  */
 static unsigned
-svga_shader_type(unsigned shader)
+svga_get_extra_constants_common(struct svga_context *svga,
+                                const struct svga_shader_variant *variant,
+                                unsigned shader, float *dest)
 {
-   switch (shader) {
-   case PIPE_SHADER_VERTEX:
-      return SVGA3D_SHADERTYPE_VS;
-   case PIPE_SHADER_FRAGMENT:
-      return SVGA3D_SHADERTYPE_PS;
-   default:
-      assert(!"Unexpected shader type");
-      return SVGA3D_SHADERTYPE_VS;
+   uint32_t *dest_u = (uint32_t *) dest;  // uint version of dest
+   unsigned i;
+   unsigned count = 0;
+
+   for (i = 0; i < variant->key.num_textures; i++) {
+      struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i];
+      if (sv) {
+         struct pipe_resource *tex = sv->texture;
+         /* Scaling factors needed for handling unnormalized texture coordinates
+          * for texture rectangles.
+          */
+         if (variant->key.tex[i].unnormalized) {
+            /* debug/sanity check */
+            assert(variant->key.tex[i].width_height_idx == count);
+
+            *dest++ = 1.0 / (float)tex->width0;
+            *dest++ = 1.0 / (float)tex->height0;
+            *dest++ = 1.0;
+            *dest++ = 1.0;
+
+            count++;
+         }
+
+         /* Store the sizes for texture buffers.
+         */
+         if (tex->target == PIPE_BUFFER) {
+            unsigned bytes_per_element = util_format_get_blocksize(sv->format);
+            *dest_u++ = tex->width0 / bytes_per_element;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+
+            count++;
+         }
+      }
    }
+
+   return count;
 }
 
 
 /**
  * Emit any extra fragment shader constants into the buffer pointed
  * to by 'dest'.
- * In particular, these would be the scaling factors needed for handling
- * unnormalized texture coordinates for texture rectangles.
  * \return number of float[4] constants put into the dest buffer
  */
 static unsigned
 svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
 {
    const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
-   const struct svga_fs_compile_key *key = &variant->key.fkey;
    unsigned count = 0;
 
-   /* SVGA_NEW_VS_VARIANT
-    */
-   if (key->num_unnormalized_coords) {
-      unsigned i;
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_FRAGMENT, dest);
 
-      for (i = 0; i < key->num_textures; i++) {
-         if (key->tex[i].unnormalized) {
-            struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
+   assert(count <= MAX_EXTRA_CONSTS);
 
-            /* debug/sanity check */
-            assert(key->tex[i].width_height_idx == count);
+   return count;
+}
 
-            *dest++ = 1.0 / (float)tex->width0;
-            *dest++ = 1.0 / (float)tex->height0;
-            *dest++ = 1.0;
-            *dest++ = 1.0;
+/**
+ * Emit extra constants needed for prescale computation into the
+ * the buffer pointed to by '*dest'. The updated buffer pointer
+ * will be returned in 'dest'.
+ */
+static unsigned
+svga_get_prescale_constants(struct svga_context *svga, float **dest)
+{
+   memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+   *dest += 4;
 
-            count++;
-         }
-      }
-   }
+   memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   *dest += 4;
 
-   assert(count <= MAX_EXTRA_CONSTS);
+   return 2;
+}
 
-   return count;
+/**
+ * Emit extra constants needed for point sprite emulation.
+ */
+static unsigned
+svga_get_pt_sprite_constants(struct svga_context *svga, float **dest)
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   float *dst = *dest;
+
+   dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
+   dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+   dst[2] = svga->curr.rast->pointsize;
+   dst[3] = screen->maxPointSize;
+   *dest = *dest + 4;
+   return 1;
 }
 
+/**
+ * Emit user-defined clip plane coefficients into the buffer pointed to
+ * by '*dest'. The updated buffer pointer will be returned in 'dest'.
+ */
+static unsigned
+svga_get_clip_plane_constants(struct svga_context *svga,
+                              const struct svga_shader_variant *variant,
+                              float **dest)
+{
+   unsigned count = 0;
+
+   /* SVGA_NEW_CLIP */
+   if (svga_have_vgpu10(svga)) {
+      /* append user-defined clip plane coefficients onto constant buffer */
+      unsigned clip_planes = variant->key.clip_plane_enable;
+      while (clip_planes) {
+         int i = u_bit_scan(&clip_planes);
+         COPY_4V(*dest, svga->curr.clip.ucp[i]);
+         *dest += 4;
+         count += 1;
+      }
+   }
+   return count;
+}
 
 /**
  * Emit any extra vertex shader constants into the buffer pointed
@@ -124,26 +198,71 @@ static unsigned
 svga_get_extra_vs_constants(struct svga_context *svga, float *dest)
 {
    const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
-   const struct svga_vs_compile_key *key = &variant->key.vkey;
    unsigned count = 0;
 
    /* SVGA_NEW_VS_VARIANT
     */
-   if (key->need_prescale) {
-      memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
-      dest += 4;
+   if (variant->key.vs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
 
-      memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   if (variant->key.vs.undo_viewport) {
+      /* Used to convert window coords back to NDC coords */
+      dest[0] = 1.0f / svga->curr.viewport.scale[0];
+      dest[1] = 1.0f / svga->curr.viewport.scale[1];
+      dest[2] = -svga->curr.viewport.translate[0];
+      dest[3] = -svga->curr.viewport.translate[1];
       dest += 4;
-
-      count = 2;
+      count += 1;
    }
 
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_VERTEX, dest);
+
    assert(count <= MAX_EXTRA_CONSTS);
 
    return count;
 }
 
+/**
+ * Emit any extra geometry shader constants into the buffer pointed
+ * to by 'dest'.
+ */
+static unsigned
+svga_get_extra_gs_constants(struct svga_context *svga, float *dest)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   unsigned count = 0;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+
+   /* Constants for point sprite
+    * These are used in the transformed gs that supports point sprite.
+    * They need to be added before the prescale constants.
+    */
+   if (variant->key.gs.wide_point) {
+      count += svga_get_pt_sprite_constants(svga, &dest);
+   }
+
+   if (variant->key.gs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
+
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_GEOMETRY, dest);
+
+   assert(count <= MAX_EXTRA_CONSTS);
+   return count;
+}
 
 /**
  * Check and emit one shader constant register.
@@ -159,6 +278,7 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,
 
    assert(shader < PIPE_SHADER_TYPES);
    assert(i < SVGA3D_CONSTREG_MAX);
+   assert(!svga_have_vgpu10(svga));
 
    if (memcmp(svga->state.hw_draw.cb[shader][i], value,
               4 * sizeof(float)) != 0) {
@@ -202,6 +322,10 @@ emit_const_range(struct svga_context *svga,
    unsigned i, j;
    enum pipe_error ret;
 
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_FRAGMENT);
+   assert(!svga_have_vgpu10(svga));
+
 #ifdef DEBUG
    if (offset + count > SVGA3D_CONSTREG_MAX) {
       debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n",
@@ -307,10 +431,12 @@ emit_const_range(struct svga_context *svga,
 
 /**
  * Emit all the constants in a constant buffer for a shader stage.
+ * On VGPU10, emit_consts_vgpu10 is used instead.
  */
 static enum pipe_error
-emit_consts(struct svga_context *svga, unsigned shader)
+emit_consts_vgpu9(struct svga_context *svga, unsigned shader)
 {
+   const struct pipe_constant_buffer *cbuf;
    struct svga_screen *ss = svga_screen(svga->pipe.screen);
    struct pipe_transfer *transfer = NULL;
    unsigned count;
@@ -320,53 +446,284 @@ emit_consts(struct svga_context *svga, unsigned shader)
    const unsigned offset = 0;
 
    assert(shader < PIPE_SHADER_TYPES);
+   assert(!svga_have_vgpu10(svga));
+   /* Only one constant buffer per shader is supported before VGPU10.
+    * This is only an approximate check against that.
+    */
+   assert(svga->curr.constbufs[shader][1].buffer == NULL);
 
-   if (svga->curr.cbufs[shader].buffer == NULL)
-      goto done;
+   cbuf = &svga->curr.constbufs[shader][0];
 
-   data = (const float (*)[4])pipe_buffer_map(&svga->pipe,
-                                              svga->curr.cbufs[shader].buffer,
-                                              PIPE_TRANSFER_READ,
-					      &transfer);
-   if (data == NULL) {
-      ret = PIPE_ERROR_OUT_OF_MEMORY;
-      goto done;
-   }
+   if (svga->curr.constbufs[shader][0].buffer) {
+      /* emit user-provided constants */
+      data = (const float (*)[4])
+         pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer,
+                         PIPE_TRANSFER_READ, &transfer);
+      if (data == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
 
-   /* sanity check */
-   assert(svga->curr.cbufs[shader].buffer->width0 >=
-          svga->curr.cbufs[shader].buffer_size);
+      /* sanity check */
+      assert(cbuf->buffer->width0 >=
+             cbuf->buffer_size);
 
-   /* Use/apply the constant buffer size and offsets here */
-   count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float));
-   data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float));
+      /* Use/apply the constant buffer size and offsets here */
+      count = cbuf->buffer_size / (4 * sizeof(float));
+      data += cbuf->buffer_offset / (4 * sizeof(float));
+
+      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+         ret = emit_const_range( svga, shader, offset, count, data );
+      }
+      else {
+         for (i = 0; i < count; i++) {
+            ret = emit_const( svga, shader, offset + i, data[i] );
+            if (ret != PIPE_OK) {
+               break;
+            }
+         }
+      }
+
+      pipe_buffer_unmap(&svga->pipe, transfer);
 
-   if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-      ret = emit_const_range( svga, shader, offset, count, data );
       if (ret != PIPE_OK) {
-         goto done;
+         return ret;
       }
-   } else {
-      for (i = 0; i < count; i++) {
-         ret = emit_const( svga, shader, offset + i, data[i] );
-         if (ret != PIPE_OK) {
-            goto done;
+   }
+
+   /* emit extra shader constants */
+   {
+      const struct svga_shader_variant *variant = NULL;
+      unsigned offset;
+      float extras[MAX_EXTRA_CONSTS][4];
+      unsigned count, i;
+
+      switch (shader) {
+      case PIPE_SHADER_VERTEX:
+         variant = svga->state.hw_draw.vs;
+         count = svga_get_extra_vs_constants(svga, (float *) extras);
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         variant = svga->state.hw_draw.fs;
+         count = svga_get_extra_fs_constants(svga, (float *) extras);
+         break;
+      default:
+         assert(!"Unexpected shader type");
+         count = 0;
+      }
+
+      assert(variant);
+      offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      assert(count <= Elements(extras));
+
+      if (count > 0) {
+         if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+            ret = emit_const_range(svga, shader, offset, count,
+                                   (const float (*) [4])extras);
+         }
+         else {
+            for (i = 0; i < count; i++) {
+               ret = emit_const(svga, shader, offset + i, extras[i]);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
          }
       }
    }
 
-done:
-   if (data)
-      pipe_buffer_unmap(&svga->pipe, transfer);
+   return ret;
+}
+
+
+
+static enum pipe_error
+emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   const struct pipe_constant_buffer *cbuf;
+   struct pipe_resource *dst_buffer = NULL;
+   enum pipe_error ret = PIPE_OK;
+   struct pipe_transfer *src_transfer;
+   struct svga_winsys_surface *dst_handle;
+   float extras[MAX_EXTRA_CONSTS][4];
+   unsigned extra_count, extra_size, extra_offset;
+   unsigned new_buf_size;
+   void *src_map = NULL, *dst_map;
+   unsigned offset;
+   const struct svga_shader_variant *variant;
+
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_FRAGMENT);
+
+   cbuf = &svga->curr.constbufs[shader][0];
+
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      variant = svga->state.hw_draw.vs;
+      extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      variant = svga->state.hw_draw.fs;
+      extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = svga->state.hw_draw.gs;
+      extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
+      break;
+   default:
+      assert(!"Unexpected shader type");
+      /* Don't return an error code since we don't want to keep re-trying
+       * this function and getting stuck in an infinite loop.
+       */
+      return PIPE_OK;
+   }
+
+   assert(variant);
+
+   /* Compute extra constants size and offset in bytes */
+   extra_size = extra_count * 4 * sizeof(float);
+   extra_offset = 4 * sizeof(float) * variant->extra_const_start;
+
+   if (cbuf->buffer_size + extra_size == 0)
+      return PIPE_OK;  /* nothing to do */
+
+   /* Typically, the cbuf->buffer here is a user-space buffer so mapping
+    * it is really cheap.  If we ever get real HW buffers for constants
+    * we should void mapping and instead use a ResourceCopy command.
+    */
+   if (cbuf->buffer_size > 0) {
+      src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
+                                      cbuf->buffer_offset, cbuf->buffer_size,
+                                      PIPE_TRANSFER_READ, &src_transfer);
+      assert(src_map);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+
+   /* The new/dest buffer's size must be large enough to hold the original,
+    * user-specified constants, plus the extra constants.
+    * The size of the original constant buffer _should_ agree with what the
+    * shader is expecting, but it might not (it's not enforced anywhere by
+    * gallium).
+    */
+   new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;
+
+   /* According to the DX10 spec, the constant buffer size must be
+    * in multiples of 16.
+    */
+   new_buf_size = align(new_buf_size, 16);
+
+   ret = u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+                        &dst_buffer, &dst_map);
+   if (ret != PIPE_OK || !dst_map) {
+      if (src_map)
+         pipe_buffer_unmap(&svga->pipe, src_transfer);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   if (src_map) {
+      memcpy(dst_map, src_map, cbuf->buffer_size);
+      pipe_buffer_unmap(&svga->pipe, src_transfer);
+   }
+
+   if (extra_size) {
+      assert(extra_offset + extra_size <= new_buf_size);
+      memcpy((char *) dst_map + extra_offset, extras, extra_size);
+   }
+   u_upload_unmap(svga->const0_upload);
+
+   /* Issue the SetSingleConstantBuffer command */
+   dst_handle = svga_buffer_handle(svga, dst_buffer);
+   if (!dst_handle) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   assert(new_buf_size % 16 == 0);
+   ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                               0, /* index */
+                                               svga_shader_type(shader),
+                                               dst_handle,
+                                               offset,
+                                               new_buf_size);
+
+   if (ret != PIPE_OK) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return ret;
+   }
+
+   /* Save this const buffer until it's replaced in the future.
+    * Otherwise, all references to the buffer will go away after the
+    * command buffer is submitted, it'll get recycled and we will have
+    * incorrect constant buffer bindings.
+    */
+   pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);
+
+   svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;
+
+   pipe_resource_reference(&dst_buffer, NULL);
 
    return ret;
 }
 
 
+static enum pipe_error
+emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   enum pipe_error ret;
+   unsigned dirty_constbufs;
+   unsigned enabled_constbufs;
+
+   /* Emit 0th constant buffer (with extra constants) */
+   ret = emit_constbuf_vgpu10(svga, shader);
+   if (ret != PIPE_OK) {
+      return ret;
+   }
+
+   enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
+
+   /* Emit other constant buffers (UBOs) */
+   dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;
+
+   while (dirty_constbufs) {
+      unsigned index = u_bit_scan(&dirty_constbufs);
+      unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
+      unsigned size = svga->curr.constbufs[shader][index].buffer_size;
+      struct svga_buffer *buffer =
+         svga_buffer(svga->curr.constbufs[shader][index].buffer);
+      struct svga_winsys_surface *handle;
+
+      if (buffer) {
+         handle = svga_buffer_handle(svga, &buffer->b.b);
+         enabled_constbufs |= 1 << index;
+      }
+      else {
+         handle = NULL;
+         enabled_constbufs &= ~(1 << index);
+         assert(offset == 0);
+         assert(size == 0);
+      }
+
+      assert(size % 16 == 0);
+      ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                                  index,
+                                                  svga_shader_type(shader),
+                                                  handle,
+                                                  offset,
+                                                  size);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
+   svga->state.dirty_constbufs[shader] = 0;
+
+   return ret;
+}
+
 static enum pipe_error
 emit_fs_consts(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
    const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
    enum pipe_error ret = PIPE_OK;
 
@@ -377,28 +734,11 @@ emit_fs_consts(struct svga_context *svga, unsigned dirty)
 
    /* SVGA_NEW_FS_CONST_BUFFER
     */
-   ret = emit_consts( svga, PIPE_SHADER_FRAGMENT );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_fs_constants(svga, (float *) extras);
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count,
-                                (const float (*) [4])extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT);
    }
 
    return ret;
@@ -419,7 +759,6 @@ struct svga_tracked_state svga_hw_fs_constants =
 static enum pipe_error
 emit_vs_consts(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
    const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
    enum pipe_error ret = PIPE_OK;
 
@@ -430,29 +769,11 @@ emit_vs_consts(struct svga_context *svga, unsigned dirty)
 
    /* SVGA_NEW_VS_CONST_BUFFER
     */
-   ret = emit_consts( svga, PIPE_SHADER_VERTEX );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_vs_constants(svga, (float *) extras);
-      assert(count <= Elements(extras));
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count,
-                                (const float (*) [4]) extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX);
    }
 
    return ret;
@@ -467,3 +788,42 @@ struct svga_tracked_state svga_hw_vs_constants =
     SVGA_NEW_VS_VARIANT),
    emit_vs_consts
 };
+
+
+static enum pipe_error
+emit_gs_consts(struct svga_context *svga, unsigned dirty)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   enum pipe_error ret = PIPE_OK;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+   if (variant == NULL)
+      return PIPE_OK;
+
+   /* SVGA_NEW_GS_CONST_BUFFER
+    */
+   if (svga_have_vgpu10(svga)) {
+      /**
+       * If only the rasterizer state has changed and the current geometry
+       * shader does not emit wide points, then there is no reason to
+       * re-emit the GS constants, so skip it.
+       */
+      if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+         return PIPE_OK;
+
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_gs_constants =
+{
+   "hw gs params",
+   (SVGA_NEW_GS_CONST_BUFFER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_GS_VARIANT),
+   emit_gs_consts
+};
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index 1c174da3130..9abacc9fa20 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -26,12 +26,14 @@
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_format.h"
 
 #include "svga_context.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_surface.h"
 
 
 /*
@@ -46,30 +48,26 @@
 #define MAX_RT_PER_BATCH 8
 
 
-/***********************************************************************
- * Hardware state update
- */
-
 
 static enum pipe_error
-emit_framebuffer( struct svga_context *svga,
-                  unsigned dirty )
+emit_fb_vgpu9(struct svga_context *svga)
 {
    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
    const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
    struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
-   boolean reemit = svga->rebind.rendertargets;
+   boolean reemit = svga->rebind.flags.rendertargets;
    unsigned i;
    enum pipe_error ret;
 
+   assert(!svga_have_vgpu10(svga));
+
    /*
     * We need to reemit non-null surface bindings, even when they are not
     * dirty, to ensure that the resources are paged in.
     */
 
    for (i = 0; i < svgascreen->max_color_buffers; i++) {
-      if (curr->cbufs[i] != hw->cbufs[i] ||
-          (reemit && hw->cbufs[i])) {
+      if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) {
          if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH)
             return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -82,14 +80,13 @@ emit_framebuffer( struct svga_context *svga,
       }
    }
 
-   if (curr->zsbuf != hw->zsbuf ||
-       (reemit && hw->zsbuf)) {
+   if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) {
       ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
       if (ret != PIPE_OK)
          return ret;
 
       if (curr->zsbuf &&
-          curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(curr->zsbuf->format)) {
          ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL,
                                       curr->zsbuf);
          if (ret != PIPE_OK)
@@ -104,8 +101,6 @@ emit_framebuffer( struct svga_context *svga,
       pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
    }
 
-   svga->rebind.rendertargets = FALSE;
-
    return PIPE_OK;
 }
 
@@ -118,15 +113,15 @@ emit_framebuffer( struct svga_context *svga,
  * Called at the beginning of every new command buffer to ensure that
  * non-dirty rendertargets are properly paged-in.
  */
-enum pipe_error
-svga_reemit_framebuffer_bindings(struct svga_context *svga)
+static enum pipe_error
+svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga)
 {
    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
    struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
    unsigned i;
    enum pipe_error ret;
 
-   assert(svga->rebind.rendertargets);
+   assert(!svga_have_vgpu10(svga));
 
    for (i = 0; i < svgascreen->max_color_buffers; i++) {
       if (hw->cbufs[i]) {
@@ -145,7 +140,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
       }
 
       if (hw->zsbuf &&
-          hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(hw->zsbuf->format)) {
          ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
          if (ret != PIPE_OK) {
             return ret;
@@ -159,7 +154,161 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
       }
    }
 
-   svga->rebind.rendertargets = FALSE;
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+emit_fb_vgpu10(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
+   struct pipe_surface *dsv;
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs);
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Setup render targets array.  Note that we loop over the max of the
+    * number of previously bound buffers and the new buffers to unbind
+    * any previously bound buffers when the new number of buffers is less
+    * than the old number of buffers.
+    */
+   for (i = 0; i < num_color; i++) {
+      if (curr->cbufs[i]) {
+         rtv[i] = svga_validate_surface_view(svga,
+                                             svga_surface(curr->cbufs[i]));
+         if (rtv[i] == NULL) {
+            return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+
+         assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
+      }
+      else {
+         rtv[i] = NULL;
+      }
+   }
+
+   /* Setup depth stencil view */
+   if (curr->zsbuf) {
+      dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+      if (dsv == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      dsv = NULL;
+   }
+
+   ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv);
+   if (ret != PIPE_OK)
+      return ret;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i] != curr->cbufs[i]) {
+         /* propagate the backed view surface before unbinding it */
+         if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) {
+            svga_propagate_surface(svga,
+                                   &svga_surface(hw->cbufs[i])->backed->base);
+         }
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+   hw->nr_cbufs = curr->nr_cbufs;
+
+   if (hw->zsbuf != curr->zsbuf) {
+      /* propagate the backed view surface before unbinding it */
+      if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
+         svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base);
+      }
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+emit_framebuffer(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_fb_vgpu10(svga);
+   }
+   else {
+      return emit_fb_vgpu9(svga);
+   }
+}
+
+
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+
+   assert(svga->rebind.flags.rendertargets);
+
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_fb_vgpu10(svga);
+   }
+   else {
+      ret = svga_reemit_framebuffer_bindings_vgpu9(svga);
+   }
+
+   svga->rebind.flags.rendertargets = FALSE;
+
+   return ret;
+}
+
+
+/*
+ * Send a private allocation command to page in rendertargets resource.
+ */
+enum pipe_error
+svga_rebind_framebuffer_bindings(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   if (!svga->rebind.flags.rendertargets)
+      return PIPE_OK;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i]) {
+         ret = svga->swc->resource_rebind(svga->swc,
+                                          svga_surface(hw->cbufs[i])->handle,
+                                          NULL,
+                                          SVGA_RELOC_WRITE);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   if (hw->zsbuf) {
+      ret = svga->swc->resource_rebind(svga->swc,
+                                       svga_surface(hw->zsbuf)->handle,
+                                       NULL,
+                                       SVGA_RELOC_WRITE);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->rebind.flags.rendertargets = 0;
 
    return PIPE_OK;
 }
@@ -202,6 +351,7 @@ emit_viewport( struct svga_context *svga,
    float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
    float fw =        viewport->scale[0] * 2.0f;
    float fh = flip * viewport->scale[1] * 2.0f;
+   boolean emit_vgpu10_viewport = FALSE;
 
    memset( &prescale, 0, sizeof(prescale) );
 
@@ -225,7 +375,16 @@ emit_viewport( struct svga_context *svga,
    prescale.translate[1] = 0;
    prescale.translate[2] = 0;
    prescale.translate[3] = 0;
-   prescale.enabled = TRUE;
+
+   /* Enable prescale to adjust vertex positions to match
+      VGPU10 convention only if rasterization is enabled.
+    */
+   if (svga->curr.rast->templ.rasterizer_discard) {
+      degenerate = TRUE;
+      goto out;
+   } else {
+      prescale.enabled = TRUE;
+   }
 
    if (fw < 0) {
       prescale.scale[0] *= -1.0f;
@@ -235,7 +394,14 @@ emit_viewport( struct svga_context *svga,
    }
 
    if (fh < 0.0) {
-      prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      if (svga_have_vgpu10(svga)) {
+         /* floating point viewport params below */
+         prescale.translate[1] = fh + fy * 2.0f;
+      }
+      else {
+         /* integer viewport params below */
+         prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      }
       fh = -fh;
       fy -= fh;
       prescale.scale[1] = -1.0f;
@@ -321,19 +487,31 @@ emit_viewport( struct svga_context *svga,
       float adjust_x = 0.0;
       float adjust_y = 0.0;
 
-      switch (svga->curr.reduced_prim) {
-      case PIPE_PRIM_POINTS:
-         adjust_x = -0.375;
-         adjust_y = -0.75;
-         break;
-      case PIPE_PRIM_LINES:
-         adjust_x = -0.5;
-         adjust_y = 0;
-         break;
-      case PIPE_PRIM_TRIANGLES:
-         adjust_x = -0.5;
-         adjust_y = -0.5;
-         break;
+      if (svga_have_vgpu10(svga)) {
+         /* Normally, we don't have to do any sub-pixel coordinate
+          * adjustments for VGPU10.  But when we draw wide points with
+          * a GS we need an X adjustment in order to be conformant.
+          */
+         if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+             svga->curr.rast->pointsize > 1.0f) {
+            adjust_x = 0.5;
+         }
+      }
+      else {
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_POINTS:
+            adjust_x = -0.375;
+            adjust_y = -0.75;
+            break;
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.5;
+            adjust_y = -0.5;
+            break;
+         }
       }
 
       if (invertY)
@@ -360,6 +538,17 @@ emit_viewport( struct svga_context *svga,
       prescale.scale[2] = -prescale.scale[2];
    }
 
+   /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
+    * zmin can be set to -1 when viewport->scale[2] is set to 1 and
+    * viewport->translate[2] is set to 0 in the blit code.
+    */
+   if (range_min < 0.0f) {
+      range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      prescale.scale[2] *= 2.0f;
+      prescale.translate[2] -= 0.5f;
+   }
+
    if (prescale.enabled) {
       float H[2];
       float J[2];
@@ -428,21 +617,49 @@ out:
       prescale.enabled = FALSE;
    }
 
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
-      if(ret != PIPE_OK)
-         return ret;
+   if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetViewport(svga->swc, &rect);
+         if (ret != PIPE_OK)
+            return ret;
 
-      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
-      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+         svga->state.hw_clear.viewport = rect;
+      }
    }
 
    if (svga->state.hw_clear.depthrange.zmin != range_min ||
-       svga->state.hw_clear.depthrange.zmax != range_max) {
-      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
-      if(ret != PIPE_OK)
+       svga->state.hw_clear.depthrange.zmax != range_max)
+   {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_clear.depthrange.zmin = range_min;
+         svga->state.hw_clear.depthrange.zmax = range_max;
+      }
+   }
+
+   if (emit_vgpu10_viewport) {
+      SVGA3dViewport vp;
+      vp.x = (float) rect.x;
+      vp.y = (float) rect.y;
+      vp.width = (float) rect.w;
+      vp.height = (float) rect.h;
+      vp.minDepth = range_min;
+      vp.maxDepth = range_max;
+      ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
+      if (ret != PIPE_OK)
          return ret;
 
+      svga->state.hw_clear.viewport = rect;
+
       svga->state.hw_clear.depthrange.zmin = range_min;
       svga->state.hw_clear.depthrange.zmax = range_max;
    }
@@ -475,14 +692,27 @@ emit_scissor_rect( struct svga_context *svga,
                    unsigned dirty )
 {
    const struct pipe_scissor_state *scissor = &svga->curr.scissor;
-   SVGA3dRect rect;
 
-   rect.x = scissor->minx;
-   rect.y = scissor->miny;
-   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
-   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   if (svga_have_vgpu10(svga)) {
+      SVGASignedRect rect;
+
+      rect.left = scissor->minx;
+      rect.top = scissor->miny;
+      rect.right = scissor->maxx;
+      rect.bottom = scissor->maxy;
+
+      return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
+   }
+   else {
+      SVGA3dRect rect;
 
-   return SVGA3D_SetScissorRect(svga->swc, &rect);
+      rect.x = scissor->minx;
+      rect.y = scissor->miny;
+      rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+      rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+      return SVGA3D_SetScissorRect(svga->swc, &rect);
+   }
 }
 
 
@@ -527,9 +757,15 @@ emit_clip_planes( struct svga_context *svga,
       plane[2] = 2.0f * c;
       plane[3] = d - c;
 
-      ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
-      if(ret != PIPE_OK)
-         return ret;
+      if (svga_have_vgpu10(svga)) {
+         //debug_printf("XXX emit DX10 clip plane\n");
+         ret = PIPE_OK;
+      }
+      else {
+         ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
+         if (ret != PIPE_OK)
+            return ret;
+      }
    }
 
    return PIPE_OK;
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 8cdce742b3b..849319d3cf3 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -36,43 +36,12 @@
 #include "svga_shader.h"
 #include "svga_resource_texture.h"
 #include "svga_tgsi.h"
+#include "svga_format.h"
 
 #include "svga_hw_reg.h"
 
 
-static inline int
-compare_fs_keys(const struct svga_fs_compile_key *a,
-                const struct svga_fs_compile_key *b)
-{
-   unsigned keysize_a = svga_fs_key_size( a );
-   unsigned keysize_b = svga_fs_key_size( b );
-
-   if (keysize_a != keysize_b) {
-      return (int)(keysize_a - keysize_b);
-   }
-   return memcmp( a, b, keysize_a );
-}
-
-
-/** Search for a fragment shader variant */
-static struct svga_shader_variant *
-search_fs_key(const struct svga_fragment_shader *fs,
-              const struct svga_fs_compile_key *key)
-{
-   struct svga_shader_variant *variant = fs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_fs_keys( key, &variant->key.fkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
  * If we fail to compile a fragment shader (because it uses too many
  * registers, for example) we'll use a dummy/fallback shader that
@@ -111,13 +80,29 @@ get_dummy_fragment_shader(void)
 }
 
 
+static struct svga_shader_variant *
+translate_fragment_program(struct svga_context *svga,
+                           const struct svga_fragment_shader *fs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &fs->base, key,
+                                        PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT);
+   }
+}
+
+
 /**
  * Replace the given shader's instruction with a simple constant-color
  * shader.  We use this when normal shader translation fails.
  */
 static struct svga_shader_variant *
-get_compiled_dummy_shader(struct svga_fragment_shader *fs,
-                          const struct svga_fs_compile_key *key)
+get_compiled_dummy_shader(struct svga_context *svga,
+                          struct svga_fragment_shader *fs,
+                          const struct svga_compile_key *key)
 {
    const struct tgsi_token *dummy = get_dummy_fragment_shader();
    struct svga_shader_variant *variant;
@@ -129,7 +114,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
    FREE((void *) fs->base.tokens);
    fs->base.tokens = dummy;
 
-   variant = svga_translate_fragment_program(fs, key);
+   variant = translate_fragment_program(svga, fs, key);
    return variant;
 }
 
@@ -140,17 +125,17 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
 static enum pipe_error
 compile_fs(struct svga_context *svga,
            struct svga_fragment_shader *fs,
-           const struct svga_fs_compile_key *key,
+           const struct svga_compile_key *key,
            struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant;
    enum pipe_error ret = PIPE_ERROR;
 
-   variant = svga_translate_fragment_program( fs, key );
+   variant = translate_fragment_program(svga, fs, key);
    if (variant == NULL) {
       debug_printf("Failed to compile fragment shader,"
                    " using dummy shader instead.\n");
-      variant = get_compiled_dummy_shader(fs, key);
+      variant = get_compiled_dummy_shader(svga, fs, key);
       if (!variant) {
          ret = PIPE_ERROR;
          goto fail;
@@ -159,10 +144,11 @@ compile_fs(struct svga_context *svga,
 
    if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_shader(fs, key);
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      variant = get_compiled_dummy_shader(svga, fs, key);
       if (!variant) {
          ret = PIPE_ERROR;
          goto fail;
@@ -197,23 +183,45 @@ fail:
 static enum pipe_error
 make_fs_key(const struct svga_context *svga,
             struct svga_fragment_shader *fs,
-            struct svga_fs_compile_key *key)
+            struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
-   int idx = 0;
 
    memset(key, 0, sizeof *key);
 
+   memcpy(key->generic_remap_table, fs->generic_remap_table,
+          sizeof(fs->generic_remap_table));
+
+   /* SVGA_NEW_GS, SVGA_NEW_VS
+    */
+   if (svga->curr.gs) {
+      key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+   } else {
+      key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+   }
+
    /* Only need fragment shader fixup for twoside lighting if doing
     * hwtnl.  Otherwise the draw module does the whole job for us.
     *
     * SVGA_NEW_SWTNL
     */
    if (!svga->state.sw.need_swtnl) {
-      /* SVGA_NEW_RAST
+      /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
        */
-      key->light_twoside = svga->curr.rast->templ.light_twoside;
-      key->front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.light_twoside = svga->curr.rast->templ.light_twoside;
+      key->fs.front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable &&
+                          svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES);
+      key->fs.aa_point = (svga->curr.rast->templ.point_smooth &&
+                          svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+                          (svga->curr.rast->pointsize > 1.0 ||
+                           svga->curr.vs->base.info.writes_psize));
+      if (key->fs.aa_point) {
+         assert(svga->curr.gs != NULL);
+         assert(svga->curr.gs->aa_point_coord_index != -1);
+         key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index;
+      }
    }
 
    /* The blend workaround for simulating logicop xor behaviour
@@ -231,7 +239,7 @@ make_fs_key(const struct svga_context *svga,
     * SVGA_NEW_BLEND
     */
    if (svga->curr.blend->need_white_fragments) {
-      key->white_fragments = 1;
+      key->fs.white_fragments = 1;
    }
 
 #ifdef DEBUG
@@ -241,22 +249,23 @@ make_fs_key(const struct svga_context *svga,
     */
    {
       static boolean warned = FALSE;
-      unsigned i, n = MAX2(svga->curr.num_sampler_views,
-                           svga->curr.num_samplers);
+      unsigned i, n = MAX2(svga->curr.num_sampler_views[shader],
+                           svga->curr.num_samplers[shader]);
       /* Only warn once to prevent too much debug output */
       if (!warned) {
-         if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
+         if (svga->curr.num_sampler_views[shader] !=
+             svga->curr.num_samplers[shader]) {
             debug_printf("svga: mismatched number of sampler views (%u) "
                          "vs. samplers (%u)\n",
-                         svga->curr.num_sampler_views,
-                         svga->curr.num_samplers);
+                         svga->curr.num_sampler_views[shader],
+                         svga->curr.num_samplers[shader]);
          }
          for (i = 0; i < n; i++) {
-            if ((svga->curr.sampler_views[i] == NULL) !=
-                (svga->curr.sampler[i] == NULL))
+            if ((svga->curr.sampler_views[shader][i] == NULL) !=
+                (svga->curr.sampler[shader][i] == NULL))
                debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
-                            i, svga->curr.sampler_views[i],
-                            i, svga->curr.sampler[i]);
+                            i, svga->curr.sampler_views[shader][i],
+                            i, svga->curr.sampler[shader][i]);
          }
          warned = TRUE;
       }
@@ -268,68 +277,62 @@ make_fs_key(const struct svga_context *svga,
     *
     * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
     */
-   for (i = 0; i < svga->curr.num_sampler_views; i++) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         assert(svga->curr.sampler_views[i]->texture);
-         key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
-         if (!svga->curr.sampler[i]->normalized_coords) {
-            key->tex[i].width_height_idx = idx++;
-            key->tex[i].unnormalized = TRUE;
-            ++key->num_unnormalized_coords;
-         }
-
-         key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
-         key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
-         key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
-         key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
-      }
-   }
-   key->num_textures = svga->curr.num_sampler_views;
-
-   idx = 0;
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
-         struct svga_texture *stex = svga_texture(tex);
-         SVGA3dSurfaceFormat format = stex->key.format;
-
-         if (format == SVGA3D_Z_D16 ||
-             format == SVGA3D_Z_D24X8 ||
-             format == SVGA3D_Z_D24S8) {
-            /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
-             * or SVGA3D_Z_D24S8 surface, we'll automatically get
-             * shadow comparison.  But we only get LEQUAL mode.
-             * Set TEX_COMPARE_NONE here so we don't emit the extra FS
-             * code for shadow comparison.
-             */
-            key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
-            key->tex[i].compare_func = PIPE_FUNC_NEVER;
-            /* These depth formats _only_ support comparison mode and
-             * not ordinary sampling so warn if the later is expected.
-             */
-            if (svga->curr.sampler[i]->compare_mode !=
-                PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               debug_warn_once("Unsupported shadow compare mode");
-            }                   
-            /* The only supported comparison mode is LEQUAL */
-            if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
-               debug_warn_once("Unsupported shadow compare function");
+   svga_init_shader_key_common(svga, shader, key);
+
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view) {
+         struct pipe_resource *tex = view->texture;
+         if (tex->target != PIPE_BUFFER) {
+            struct svga_texture *stex = svga_texture(tex);
+            SVGA3dSurfaceFormat format = stex->key.format;
+
+            if (!svga_have_vgpu10(svga) &&
+                (format == SVGA3D_Z_D16 ||
+                 format == SVGA3D_Z_D24X8 ||
+                 format == SVGA3D_Z_D24S8)) {
+               /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
+                * or SVGA3D_Z_D24S8 surface, we'll automatically get
+                * shadow comparison.  But we only get LEQUAL mode.
+                * Set TEX_COMPARE_NONE here so we don't emit the extra FS
+                * code for shadow comparison.
+                */
+               key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
+               key->tex[i].compare_func = PIPE_FUNC_NEVER;
+               /* These depth formats _only_ support comparison mode and
+                * not ordinary sampling so warn if the later is expected.
+                */
+               if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+                  debug_warn_once("Unsupported shadow compare mode");
+               }
+               /* The shader translation code can emit code to
+                * handle ALWAYS and NEVER compare functions
+                */
+               else if (sampler->compare_func == PIPE_FUNC_ALWAYS ||
+                        sampler->compare_func == PIPE_FUNC_NEVER) {
+                  key->tex[i].compare_mode = sampler->compare_mode;
+                  key->tex[i].compare_func = sampler->compare_func;
+               }
+               else if (sampler->compare_func != PIPE_FUNC_LEQUAL) {
+                  debug_warn_once("Unsupported shadow compare function");
+               }
+            }
+            else {
+               /* For other texture formats, just use the compare func/mode
+                * as-is.  Should be no-ops for color textures.  For depth
+                * textures, we do not get automatic depth compare.  We have
+                * to do it ourselves in the shader.  And we don't get PCF.
+                */
+               key->tex[i].compare_mode = sampler->compare_mode;
+               key->tex[i].compare_func = sampler->compare_func;
             }
-         }
-         else {
-            /* For other texture formats, just use the compare func/mode
-             * as-is.  Should be no-ops for color textures.  For depth
-             * textures, we do not get automatic depth compare.  We have
-             * to do it ourselves in the shader.  And we don't get PCF.
-             */
-            key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
-            key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
          }
       }
    }
 
    /* sprite coord gen state */
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
       key->tex[i].sprite_texgen =
          svga->curr.rast->templ.sprite_coord_enable & (1 << i);
    }
@@ -337,10 +340,25 @@ make_fs_key(const struct svga_context *svga,
    key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
                                     == PIPE_SPRITE_COORD_LOWER_LEFT);
 
+   key->fs.flatshade = svga->curr.rast->templ.flatshade;
+
+   /* SVGA_NEW_DEPTH_STENCIL_ALPHA */
+   if (svga_have_vgpu10(svga)) {
+      /* Alpha testing is not supported in integer-valued render targets. */
+      if (svga_has_any_integer_cbufs(svga)) {
+         key->fs.alpha_func = SVGA3D_CMP_ALWAYS;
+         key->fs.alpha_ref = 0;
+      }
+      else {
+         key->fs.alpha_func = svga->curr.depth->alphafunc;
+         key->fs.alpha_ref = svga->curr.depth->alpharef;
+      }
+   }
+
    /* SVGA_NEW_FRAME_BUFFER */
    if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
       /* Replicate color0 output to N colorbuffers */
-      key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
+      key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
    }
 
    return PIPE_OK;
@@ -355,18 +373,32 @@ svga_reemit_fs_bindings(struct svga_context *svga)
 {
    enum pipe_error ret;
 
-   assert(svga->rebind.fs);
+   assert(svga->rebind.flags.fs);
    assert(svga_have_gb_objects(svga));
 
    if (!svga->state.hw_draw.fs)
       return PIPE_OK;
 
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                            svga->state.hw_draw.fs->gb_shader);
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL,
+                                        svga->state.hw_draw.fs->gb_shader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                                    svga->state.hw_draw.fs->gb_shader,
+                                    svga->state.hw_draw.fs->id);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                               svga->state.hw_draw.fs->gb_shader);
+
+ out:
    if (ret != PIPE_OK)
       return ret;
 
-   svga->rebind.fs = FALSE;
+   svga->rebind.flags.fs = FALSE;
    return PIPE_OK;
 }
 
@@ -378,7 +410,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
    struct svga_shader_variant *variant = NULL;
    enum pipe_error ret = PIPE_OK;
    struct svga_fragment_shader *fs = svga->curr.fs;
-   struct svga_fs_compile_key key;
+   struct svga_compile_key key;
 
    /* SVGA_NEW_BLEND
     * SVGA_NEW_TEXTURE_BINDING
@@ -386,14 +418,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
     * SVGA_NEW_NEED_SWTNL
     * SVGA_NEW_SAMPLER
     * SVGA_NEW_FRAME_BUFFER
+    * SVGA_NEW_DEPTH_STENCIL_ALPHA
+    * SVGA_NEW_VS
     */
-   ret = make_fs_key( svga, fs, &key );
+   ret = make_fs_key(svga, fs, &key);
    if (ret != PIPE_OK)
       return ret;
 
-   variant = search_fs_key( fs, &key );
+   variant = svga_search_shader_key(&fs->base, &key);
    if (!variant) {
-      ret = compile_fs( svga, fs, &key, &variant );
+      ret = compile_fs(svga, fs, &key, &variant);
       if (ret != PIPE_OK)
          return ret;
    }
@@ -401,22 +435,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
    assert(variant);
 
    if (variant != svga->state.hw_draw.fs) {
-      if (svga_have_gb_objects(svga)) {
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                                  variant->gb_shader);
-         if (ret != PIPE_OK)
-            return ret;
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK)
+         return ret;
 
-         svga->rebind.fs = FALSE;
-      }
-      else {
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
-         if (ret != PIPE_OK)
-            return ret;
-      }
+      svga->rebind.flags.fs = FALSE;
 
       svga->dirty |= SVGA_NEW_FS_VARIANT;
-      svga->state.hw_draw.fs = variant;      
+      svga->state.hw_draw.fs = variant;
    }
 
    return PIPE_OK;
@@ -426,11 +452,15 @@ struct svga_tracked_state svga_hw_fs =
 {
    "fragment shader (hwtnl)",
    (SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_VS |
     SVGA_NEW_TEXTURE_BINDING |
     SVGA_NEW_NEED_SWTNL |
     SVGA_NEW_RAST |
+    SVGA_NEW_REDUCED_PRIMITIVE |
     SVGA_NEW_SAMPLER |
     SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
     SVGA_NEW_BLEND),
    emit_hw_fs
 };
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index cac39d62fd1..429241e64e2 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -26,6 +26,7 @@
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_debug.h"
 #include "svga_hw_reg.h"
@@ -91,7 +92,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
       unsigned generic_inputs =
          svga->curr.fs ? svga->curr.fs->generic_inputs : 0;
 
-      if (sprite_coord_gen &&
+      if (!svga_have_vgpu10(svga) && sprite_coord_gen &&
           (generic_inputs & ~sprite_coord_gen)) {
          /* The fragment shader is using some generic inputs that are
           * not being replaced by auto-generated point/sprite coords (and
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index ebb98373e2b..d43894d71b1 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -23,16 +23,20 @@
  *
  **********************************************************/
 
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_shader.h"
 
 
 struct rs_queue {
@@ -77,7 +81,7 @@ svga_queue_rs( struct rs_queue *q,
  * the "to" state.
  */
 static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
 {
    struct svga_screen *screen = svga_screen(svga->pipe.screen);
    struct rs_queue queue;
@@ -85,7 +89,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
 
    queue.rs_count = 0;
 
-   if (dirty & SVGA_NEW_BLEND) {
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
       const struct svga_blend_state *curr = svga->curr.blend;
 
       EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
@@ -119,7 +123,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
       EMIT_RS( svga, color, BLENDCOLOR, fail );
    }
 
-   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) {
       const struct svga_depth_stencil_state *curr = svga->curr.depth; 
       const struct svga_rasterizer_state *rast = svga->curr.rast; 
 
@@ -300,6 +304,151 @@ fail:
    return PIPE_ERROR_OUT_OF_MEMORY;
 }
 
+/** Returns a non-culling rasterizer state object to be used with
+ *  point sprite.
+ */
+static struct svga_rasterizer_state *
+get_no_cull_rasterizer_state(struct svga_context *svga)
+{
+   const struct svga_rasterizer_state *r = svga->curr.rast;
+   unsigned int aa_point = r->templ.point_smooth;
+
+   if (!svga->rasterizer_no_cull[aa_point]) {
+      struct pipe_rasterizer_state rast;
+
+      memset(&rast, 0, sizeof(rast));
+      rast.flatshade = 1;
+      rast.front_ccw = 1;
+      rast.point_smooth = r->templ.point_smooth;
+
+      /* All rasterizer states have the same half_pixel_center,
+       * bottom_edge_rule and clip_halfz values since they are
+       * constant for a context. If we ever implement
+       * GL_ARB_clip_control, the clip_halfz field would have to be observed.
+       */
+      rast.half_pixel_center = r->templ.half_pixel_center;
+      rast.bottom_edge_rule = r->templ.bottom_edge_rule;
+      rast.clip_halfz = r->templ.clip_halfz;
+
+      svga->rasterizer_no_cull[aa_point] =
+               svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
+   }
+   return svga->rasterizer_no_cull[aa_point];
+}
+
+static enum pipe_error
+emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   svga_hwtnl_flush_retry(svga);
+
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
+      const struct svga_blend_state *curr;
+      float blend_factor[4];
+
+      if (svga_has_any_integer_cbufs(svga)) {
+         /* Blending is not supported in integer-valued render targets. */
+         curr = svga->noop_blend;
+         blend_factor[0] =
+         blend_factor[1] =
+         blend_factor[2] =
+         blend_factor[3] = 0;
+      }
+      else {
+         curr = svga->curr.blend;
+
+         if (curr->blend_color_alpha) {
+            blend_factor[0] =
+            blend_factor[1] =
+            blend_factor[2] =
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+         else {
+            blend_factor[0] = svga->curr.blend_color.color[0];
+            blend_factor[1] = svga->curr.blend_color.color[1];
+            blend_factor[2] = svga->curr.blend_color.color[2];
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+      }
+
+      /* Set/bind the blend state object */
+      if (svga->state.hw_draw.blend_id != curr->id ||
+          svga->state.hw_draw.blend_factor[0] != blend_factor[0] ||
+          svga->state.hw_draw.blend_factor[1] != blend_factor[1] ||
+          svga->state.hw_draw.blend_factor[2] != blend_factor[2] ||
+          svga->state.hw_draw.blend_factor[3] != blend_factor[3] ||
+          svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) {
+         ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id,
+                                           blend_factor,
+                                           svga->curr.sample_mask);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.blend_id = curr->id;
+         svga->state.hw_draw.blend_factor[0] = blend_factor[0];
+         svga->state.hw_draw.blend_factor[1] = blend_factor[1];
+         svga->state.hw_draw.blend_factor[2] = blend_factor[2];
+         svga->state.hw_draw.blend_factor[3] = blend_factor[3];
+         svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth;
+      unsigned curr_ref = svga->curr.stencil_ref.ref_value[0];
+
+      if (curr->id != svga->state.hw_draw.depth_stencil_id ||
+          curr_ref != svga->state.hw_draw.stencil_ref) {
+         /* Set/bind the depth/stencil state object */
+         ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id,
+                                                  curr_ref);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.depth_stencil_id = curr->id;
+         svga->state.hw_draw.stencil_ref = curr_ref;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
+      const struct svga_rasterizer_state *rast;
+
+      if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+          svga->curr.gs && svga->curr.gs->wide_point) {
+
+         /* If we are drawing a point sprite, we will need to
+          * bind a non-culling rasterizer state object
+          */
+         rast = get_no_cull_rasterizer_state(svga);
+      }
+      else {
+         rast = svga->curr.rast;
+      }
+
+      if (svga->state.hw_draw.rasterizer_id != rast->id) {
+         /* Set/bind the rasterizer state object */
+         ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+         if (ret != PIPE_OK)
+            return ret;
+         svga->state.hw_draw.rasterizer_id = rast->id;
+      }
+   }
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_rss(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_rss_vgpu10(svga, dirty);
+   }
+   else {
+      return emit_rss_vgpu9(svga, dirty);
+   }
+}
+
 
 struct svga_tracked_state svga_hw_rss = 
 {
@@ -307,11 +456,12 @@ struct svga_tracked_state svga_hw_rss =
 
    (SVGA_NEW_BLEND |
     SVGA_NEW_BLEND_COLOR |
-    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
     SVGA_NEW_STENCIL_REF |
     SVGA_NEW_RAST |
     SVGA_NEW_FRAME_BUFFER |
-    SVGA_NEW_NEED_PIPELINE),
+    SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_REDUCED_PRIMITIVE),
 
    emit_rss
 };
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index e350f540335..1c6913e4a26 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -45,7 +45,7 @@
 
 
 /** Get resource handle for a texture or buffer */
-static INLINE struct svga_winsys_surface *
+static inline struct svga_winsys_surface *
 svga_resource_handle(struct pipe_resource *res)
 {
    if (res->target == PIPE_BUFFER) {
@@ -111,7 +111,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
 
       if (texture->target == PIPE_BUFFER) {
          viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
-         viewDesc.buffer.numElements = (sv->base.u.buf.last_element - 
+         viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
                                         sv->base.u.buf.first_element + 1);
       }
       else {
@@ -122,13 +122,13 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
       }
 
       /* arraySize in viewDesc specifies the number of array slices in a
-       * texture array. For 3D texture, last_layer in 
+       * texture array. For 3D texture, last_layer in
        * pipe_sampler_view specifies the last slice of the texture
        * which is different from the last slice in a texture array,
        * hence we need to set arraySize to 1 explicitly.
        */
-      viewDesc.tex.arraySize = 
-         (texture->target == PIPE_TEXTURE_3D || 
+      viewDesc.tex.arraySize =
+         (texture->target == PIPE_TEXTURE_3D ||
           texture->target == PIPE_BUFFER) ? 1 :
             (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
 
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 41334bd7cb9..a13980d0e13 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -31,24 +31,28 @@
 #include "svga_sampler_view.h"
 #include "svga_winsys.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
 
 
+/**
+ * Called when tearing down a context to free resources and samplers.
+ */
 void svga_cleanup_tss_binding(struct svga_context *svga)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
-                          svga->state.hw_draw.num_views );
 
-   for (i = 0; i < count; i++) {
+   for (i = 0; i < Elements(svga->state.hw_draw.views); i++) {
       struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-
-      svga_sampler_view_reference(&view->v, NULL);
-      pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]);
-      pipe_resource_reference( &view->texture, NULL );
-
-      view->dirty = 1;
+      if (view) {
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_sampler_view_release(&svga->pipe,
+                                   &svga->curr.sampler_views[shader][i]);
+         pipe_resource_reference(&view->texture, NULL);
+         view->dirty = TRUE;
+      }
    }
 }
 
@@ -63,73 +67,113 @@ struct bind_queue {
 };
 
 
+/**
+ * Update the texture binding for one texture unit.
+ */
+static void
+emit_tex_binding_unit(struct svga_context *svga,
+                      unsigned unit,
+                      const struct svga_sampler_state *s,
+                      const struct pipe_sampler_view *sv,
+                      struct svga_hw_view_state *view,
+                      boolean reemit,
+                      struct bind_queue *queue)
+{
+   struct pipe_resource *texture = NULL;
+   unsigned last_level, min_lod, max_lod;
+
+   /* get min max lod */
+   if (sv && s) {
+      if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) {
+         /* just use the base level image */
+         min_lod = max_lod = sv->u.tex.first_level;
+      }
+      else {
+         last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
+         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
+         min_lod = MIN2(min_lod, last_level);
+         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
+      }
+      texture = sv->texture;
+   }
+   else {
+      min_lod = 0;
+      max_lod = 0;
+   }
+
+   if (view->texture != texture ||
+       view->min_lod != min_lod ||
+       view->max_lod != max_lod) {
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_resource_reference( &view->texture, texture );
+
+      view->dirty = TRUE;
+      view->min_lod = min_lod;
+      view->max_lod = max_lod;
+
+      if (texture) {
+         view->v = svga_get_tex_sampler_view(&svga->pipe,
+                                             texture,
+                                             min_lod,
+                                             max_lod);
+      }
+   }
+
+   /*
+    * We need to reemit non-null texture bindings, even when they are not
+    * dirty, to ensure that the resources are paged in.
+    */
+   if (view->dirty || (reemit && view->v)) {
+      queue->bind[queue->bind_count].unit = unit;
+      queue->bind[queue->bind_count].view = view;
+      queue->bind_count++;
+   }
+
+   if (!view->dirty && view->v) {
+      svga_validate_sampler_view(svga, view->v);
+   }
+}
+
+
 static enum pipe_error
 update_tss_binding(struct svga_context *svga, 
                    unsigned dirty )
 {
-   boolean reemit = svga->rebind.texture_samplers;
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
+   boolean reemit = svga->rebind.flags.texture_samplers;
    unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
+   unsigned count = MAX2( svga->curr.num_sampler_views[shader],
                           svga->state.hw_draw.num_views );
-   unsigned min_lod;
-   unsigned max_lod;
 
    struct bind_queue queue;
 
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
+
    queue.bind_count = 0;
    
    for (i = 0; i < count; i++) {
-      const struct svga_sampler_state *s = svga->curr.sampler[i];
-      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-      struct pipe_resource *texture = NULL;
-      struct pipe_sampler_view *sv = svga->curr.sampler_views[i];
-
-      /* get min max lod */
-      if (sv && s) {
-         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
-         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level,
-                        sv->texture->last_level);
-         texture = sv->texture;
-      } else {
-         min_lod = 0;
-         max_lod = 0;
-      }
-
-      if (view->texture != texture ||
-          view->min_lod != min_lod ||
-          view->max_lod != max_lod) {
-
-         svga_sampler_view_reference(&view->v, NULL);
-         pipe_resource_reference( &view->texture, texture );
-
-         view->dirty = TRUE;
-         view->min_lod = min_lod;
-         view->max_lod = max_lod;
-
-         if (texture)
-            view->v = svga_get_tex_sampler_view(&svga->pipe, 
-                                                texture, 
-                                                min_lod,
-                                                max_lod);
-      }
-
-      /*
-       * We need to reemit non-null texture bindings, even when they are not
-       * dirty, to ensure that the resources are paged in.
-       */
-
-      if (view->dirty ||
-          (reemit && view->v)) {
-         queue.bind[queue.bind_count].unit = i;
-         queue.bind[queue.bind_count].view = view;
-         queue.bind_count++;
-      } 
-      if (!view->dirty && view->v) {
-         svga_validate_sampler_view(svga, view->v);
-      }
+      emit_tex_binding_unit(svga, i,
+                            svga->curr.sampler[shader][i],
+                            svga->curr.sampler_views[shader][i],
+                            &svga->state.hw_draw.views[i],
+                            reemit,
+                            &queue);
    }
 
-   svga->state.hw_draw.num_views = svga->curr.num_sampler_views;
+   svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader];
+
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      emit_tex_binding_unit(svga, unit,
+                            svga->polygon_stipple.sampler,
+                            &svga->polygon_stipple.sampler_view->base,
+                            &svga->state.hw_draw.views[unit],
+                            reemit,
+                            &queue);
+   }
 
    if (queue.bind_count) {
       SVGA3dTextureState *ts;
@@ -163,7 +207,7 @@ update_tss_binding(struct svga_context *svga,
       SVGA_FIFOCommitAll( svga->swc );
    }
 
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
 
    return PIPE_OK;
 
@@ -187,7 +231,8 @@ svga_reemit_tss_bindings(struct svga_context *svga)
    enum pipe_error ret;
    struct bind_queue queue;
 
-   assert(svga->rebind.texture_samplers);
+   assert(!svga_have_vgpu10(svga));
+   assert(svga->rebind.flags.texture_samplers);
 
    queue.bind_count = 0;
 
@@ -201,6 +246,18 @@ svga_reemit_tss_bindings(struct svga_context *svga)
       }
    }
 
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
+
+      if (view->v) {
+         queue.bind[queue.bind_count].unit = unit;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      }
+   }
+
    if (queue.bind_count) {
       SVGA3dTextureState *ts;
 
@@ -229,7 +286,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
       SVGA_FIFOCommitAll(svga->swc);
    }
 
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
 
    return PIPE_OK;
 }
@@ -238,6 +295,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
 struct svga_tracked_state svga_hw_tss_binding = {
    "texture binding emit",
    SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_STIPPLE |
    SVGA_NEW_SAMPLER,
    update_tss_binding
 };
@@ -252,78 +310,98 @@ struct ts_queue {
 };
 
 
-#define EMIT_TS(svga, unit, val, token, fail)                           \
+static inline void
+svga_queue_tss( struct ts_queue *q,
+                unsigned unit,
+                unsigned tss,
+                unsigned value )
+{
+   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+   q->ts[q->ts_count].stage = unit;
+   q->ts[q->ts_count].name = tss;
+   q->ts[q->ts_count].value = value;
+   q->ts_count++;
+}
+
+
+#define EMIT_TS(svga, unit, val, token)                                 \
 do {                                                                    \
    assert(unit < Elements(svga->state.hw_draw.ts));                     \
    assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
    if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
       svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
    }                                                                    \
 } while (0)
 
-#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token)                        \
 do {                                                                    \
    unsigned val = fui(fvalue);                                          \
    assert(unit < Elements(svga->state.hw_draw.ts));                     \
    assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
    if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
       svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
    }                                                                    \
 } while (0)
 
 
-static inline void 
-svga_queue_tss( struct ts_queue *q,
-                unsigned unit,
-                unsigned tss,
-                unsigned value )
+/**
+ * Emit texture sampler state (tss) for one texture unit.
+ */
+static void
+emit_tss_unit(struct svga_context *svga, unsigned unit,
+              const struct svga_sampler_state *state,
+              struct ts_queue *queue)
 {
-   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
-   q->ts[q->ts_count].stage = unit;
-   q->ts[q->ts_count].name = tss;
-   q->ts[q->ts_count].value = value;
-   q->ts_count++;
+   EMIT_TS(svga, unit, state->mipfilter, MIPFILTER);
+   EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL);
+   EMIT_TS(svga, unit, state->magfilter, MAGFILTER);
+   EMIT_TS(svga, unit, state->minfilter, MINFILTER);
+   EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL);
+   EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS);
+   EMIT_TS(svga, unit, state->addressu, ADDRESSU);
+   EMIT_TS(svga, unit, state->addressw, ADDRESSW);
+   EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR);
+   // TEXCOORDINDEX -- hopefully not needed
+
+   if (svga->curr.tex_flags.flag_1d & (1 << unit))
+      EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV);
+   else
+      EMIT_TS(svga, unit, state->addressv, ADDRESSV);
+
+   if (svga->curr.tex_flags.flag_srgb & (1 << unit))
+      EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA);
+   else
+      EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA);
 }
 
-
 static enum pipe_error
 update_tss(struct svga_context *svga, 
            unsigned dirty )
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
    unsigned i;
    struct ts_queue queue;
 
-   queue.ts_count = 0;
-   for (i = 0; i < svga->curr.num_samplers; i++) {
-      if (svga->curr.sampler[i]) {
-         const struct svga_sampler_state *curr = svga->curr.sampler[i];
-
-         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
-         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
-         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
-         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
-         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
-         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
-         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
-         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
-         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
-         // TEXCOORDINDEX -- hopefully not needed
-
-         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
-            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
-         }
-         else
-            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
-
-         if (svga->curr.tex_flags.flag_srgb & (1 << i))
-            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
-         else
-            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
 
+   queue.ts_count = 0;
+   for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
+      if (svga->curr.sampler[shader][i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[shader][i];
+         emit_tss_unit(svga, i, curr, &queue);
       }
    }
+
+   /* polygon stipple sampler */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      emit_tss_unit(svga,
+                    svga->state.hw_draw.fs->pstipple_sampler_unit,
+                    svga->polygon_stipple.sampler,
+                    &queue);
+   }
  
    if (queue.ts_count) {
       SVGA3dTextureState *ts;
@@ -357,6 +435,7 @@ fail:
 struct svga_tracked_state svga_hw_tss = {
    "texture state emit",
    (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
     SVGA_NEW_TEXTURE_FLAGS),
    update_tss
 };
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index a33eda38366..e1b6a1c2a44 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -33,6 +33,7 @@
 #include "svga_draw.h"
 #include "svga_tgsi.h"
 #include "svga_screen.h"
+#include "svga_shader.h"
 #include "svga_resource_buffer.h"
 #include "svga_hw_reg.h"
 
@@ -42,16 +43,14 @@ static enum pipe_error
 emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
 {
    const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
+   unsigned buffer_indexes[SVGA3D_INPUTREG_MAX];
    unsigned i;
    unsigned neg_bias = 0;
 
    assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
 
-   /* specify number of vertex element declarations to come */
-   svga_hwtnl_reset_vdecl( svga->hwtnl,
-                           svga->curr.velems->count );
-
    /**
     * We can't set the VDECL offset to something negative, so we
     * must calculate a common negative additional index bias, and modify
@@ -70,15 +69,16 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
    for (i = 0; i < svga->curr.velems->count; i++) {
       const struct pipe_vertex_buffer *vb =
          &svga->curr.vb[ve[i].vertex_buffer_index];
-      const struct svga_buffer *buffer;
+      struct svga_buffer *buffer;
       unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
 
       if (!vb->buffer)
          continue;
 
       buffer = svga_buffer(vb->buffer);
       if (buffer->uploaded.start > offset) {
-         unsigned tmp_neg_bias = buffer->uploaded.start - offset;
+         tmp_neg_bias = buffer->uploaded.start - offset;
          if (vb->stride)
             tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
          neg_bias = MAX2(neg_bias, tmp_neg_bias);
@@ -89,8 +89,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
       const struct pipe_vertex_buffer *vb =
          &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
-      const struct svga_buffer *buffer;
-      SVGA3dVertexDecl decl;
+      struct svga_buffer *buffer;
 
       if (!vb->buffer)
          continue;
@@ -100,29 +99,37 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
 
       /* SVGA_NEW_VELEMENT
        */
-      decl.identity.type = svga->curr.velems->decl_type[i];
-      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
-      decl.identity.usage = usage;
-      decl.identity.usageIndex = index;
-      decl.array.stride = vb->stride;
+      decls[i].identity.type = svga->curr.velems->decl_type[i];
+      decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decls[i].identity.usage = usage;
+      decls[i].identity.usageIndex = index;
+      decls[i].array.stride = vb->stride;
 
       /* Compensate for partially uploaded vbo, and
        * for the negative index bias.
        */
-      decl.array.offset = (vb->buffer_offset
+      decls[i].array.offset = (vb->buffer_offset
                            + ve[i].src_offset
 			   + neg_bias * vb->stride
 			   - buffer->uploaded.start);
 
-      assert(decl.array.offset >= 0);
+      assert(decls[i].array.offset >= 0);
+
+      buffer_indexes[i] = ve[i].vertex_buffer_index;
 
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &decl,
-                        buffer->uploaded.buffer ? buffer->uploaded.buffer :
-                        vb->buffer );
+      assert(!buffer->uploaded.buffer);
    }
 
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga->curr.velems->count,
+                           decls,
+                           buffer_indexes,
+                           svga->curr.velems->id);
+
+   svga_hwtnl_vertex_buffers(svga->hwtnl,
+                             svga->curr.num_vertex_buffers,
+                             svga->curr.vb);
+
    svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias );
    return PIPE_OK;
 }
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index c2a0f1ee6b1..45dcb5d0f76 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -25,7 +25,6 @@
 
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
-#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
@@ -41,33 +40,6 @@
 #include "svga_hw_reg.h"
 
 
-static inline int
-compare_vs_keys(const struct svga_vs_compile_key *a,
-                const struct svga_vs_compile_key *b)
-{
-   unsigned keysize = svga_vs_key_size( a );
-   return memcmp( a, b, keysize );
-}
-
-
-/** Search for a vertex shader variant */
-static struct svga_shader_variant *
-search_vs_key(const struct svga_vertex_shader *vs,
-              const struct svga_vs_compile_key *key)
-{
-   struct svga_shader_variant *variant = vs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_vs_keys( key, &variant->key.vkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
  * If we fail to compile a vertex shader we'll use a dummy/fallback shader
  * that simply emits a (0,0,0,1) vertex position.
@@ -99,13 +71,29 @@ get_dummy_vertex_shader(void)
 }
 
 
+static struct svga_shader_variant *
+translate_vertex_program(struct svga_context *svga,
+                         const struct svga_vertex_shader *vs,
+                         const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &vs->base, key,
+                                        PIPE_SHADER_VERTEX);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&vs->base, key, PIPE_SHADER_VERTEX);
+   }
+}
+
+
 /**
  * Replace the given shader's instruction with a simple / dummy shader.
  * We use this when normal shader translation fails.
  */
 static struct svga_shader_variant *
-get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
-                                 const struct svga_vs_compile_key *key)
+get_compiled_dummy_vertex_shader(struct svga_context *svga,
+                                 struct svga_vertex_shader *vs,
+                                 const struct svga_compile_key *key)
 {
    const struct tgsi_token *dummy = get_dummy_vertex_shader();
    struct svga_shader_variant *variant;
@@ -117,7 +105,7 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
    FREE((void *) vs->base.tokens);
    vs->base.tokens = dummy;
 
-   variant = svga_translate_vertex_program(vs, key);
+   variant = translate_vertex_program(svga, vs, key);
    return variant;
 }
 
@@ -128,16 +116,17 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
 static enum pipe_error
 compile_vs(struct svga_context *svga,
            struct svga_vertex_shader *vs,
-           const struct svga_vs_compile_key *key,
+           const struct svga_compile_key *key,
            struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant;
    enum pipe_error ret = PIPE_ERROR;
 
-   variant = svga_translate_vertex_program( vs, key );
+   variant = translate_vertex_program(svga, vs, key);
    if (variant == NULL) {
-      /* some problem during translation, try the dummy shader */
-      variant = get_compiled_dummy_vertex_shader(vs, key);
+      debug_printf("Failed to compile vertex shader,"
+                   " using dummy shader instead.\n");
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
       if (!variant) {
          ret = PIPE_ERROR;
          goto fail;
@@ -146,11 +135,11 @@ compile_vs(struct svga_context *svga,
 
    if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens
-                   * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_vertex_shader(vs, key);
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
       if (!variant) {
          ret = PIPE_ERROR;
          goto fail;
@@ -163,10 +152,6 @@ compile_vs(struct svga_context *svga,
 
    *out_variant = variant;
 
-   /* insert variants at head of linked list */
-   variant->next = vs->base.variants;
-   vs->base.variants = variant;
-
    return PIPE_OK;
 
 fail:
@@ -179,18 +164,44 @@ fail:
 /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
  */
 static void
-make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
+make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_VERTEX;
+
    memset(key, 0, sizeof *key);
-   key->need_prescale = svga->state.hw_clear.prescale.enabled;
-   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+
+   if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) {
+      /* Set both of these flags, to match compile_passthrough_vs() */
+      key->vs.passthrough = 1;
+      key->vs.undo_viewport = 1;
+      return;
+   }
+
+   key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+                           (svga->curr.gs == NULL);
+   key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
 
    /* SVGA_NEW_FS */
-   key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+   key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+   svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table);
 
    /* SVGA_NEW_VELEMENT */
-   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
-   key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+   key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int;
+   key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof;
+   key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof;
+   key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra;
+   key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm;
+   key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled;
+   key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
+
+   /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
+   svga_init_shader_key_common(svga, shader, key);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
 }
 
 
@@ -201,63 +212,196 @@ enum pipe_error
 svga_reemit_vs_bindings(struct svga_context *svga)
 {
    enum pipe_error ret;
-   struct svga_winsys_gb_shader *gbshader =
-      svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
 
-   assert(svga->rebind.vs);
+   assert(svga->rebind.flags.vs);
    assert(svga_have_gb_objects(svga));
 
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+   if (svga->state.hw_draw.vs) {
+      gbshader = svga->state.hw_draw.vs->gb_shader;
+      shaderId = svga->state.hw_draw.vs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS,
+                                    gbshader, shaderId);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+
+ out:
    if (ret != PIPE_OK)
       return ret;
 
-   svga->rebind.vs = FALSE;
+   svga->rebind.flags.vs = FALSE;
    return PIPE_OK;
 }
 
 
+/**
+ * The current vertex shader is already executed by the 'draw'
+ * module, so we just need to generate a simple vertex shader
+ * to pass through all those VS outputs that will
+ * be consumed by the fragment shader.
+ * Used when we employ the 'draw' module.
+ */
 static enum pipe_error
-emit_hw_vs(struct svga_context *svga, unsigned dirty)
+compile_passthrough_vs(struct svga_context *svga,
+                       struct svga_vertex_shader *vs,
+                       struct svga_fragment_shader *fs,
+                       struct svga_shader_variant **out_variant)
 {
    struct svga_shader_variant *variant = NULL;
+   unsigned num_inputs;
+   unsigned i;
+   unsigned num_elements;
+   struct svga_vertex_shader new_vs;
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_program *ureg;
+   unsigned num_tokens;
+   struct svga_compile_key key;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+   assert(fs);
+
+   num_inputs = fs->base.info.num_inputs;
+
+   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!ureg)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* draw will always add position */
+   dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+   src[0] = ureg_DECL_vs_input(ureg, 0);
+   num_elements = 1;
+
+   /**
+    * swtnl backend redefines the input layout based on the
+    * fragment shader's inputs. So we only need to passthrough
+    * those inputs that will be consumed by the fragment shader.
+    * Note: DX10 requires the number of vertex elements
+    * specified in the input layout to be no less than the
+    * number of inputs to the vertex shader.
+    */
+   for (i = 0; i < num_inputs; i++) {
+      switch (fs->base.info.input_semantic_name[i]) {
+      case TGSI_SEMANTIC_COLOR:
+      case TGSI_SEMANTIC_GENERIC:
+      case TGSI_SEMANTIC_FOG:
+         dst[num_elements] = ureg_DECL_output(ureg,
+                                fs->base.info.input_semantic_name[i],
+                                fs->base.info.input_semantic_index[i]);
+         src[num_elements] = ureg_DECL_vs_input(ureg, num_elements);
+         num_elements++;
+         break;
+      default:
+         break;
+      }
+   }
+
+   for (i = 0; i < num_elements; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   ureg_END(ureg);
+
+   memset(&new_vs, 0, sizeof(new_vs));
+   new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens);
+   tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info);
+
+   memset(&key, 0, sizeof(key));
+   key.vs.undo_viewport = 1;
+
+   ret = compile_vs(svga, &new_vs, &key, &variant);
+   if (ret != PIPE_OK)
+      return ret;
+
+   ureg_free_tokens(new_vs.base.tokens);
+   ureg_destroy(ureg);
+
+   /* Overwrite the variant key to indicate it's a pass-through VS */
+   memset(&variant->key, 0, sizeof(variant->key));
+   variant->key.vs.passthrough = 1;
+   variant->key.vs.undo_viewport = 1;
+
+   *out_variant = variant;
+
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_hw_vs(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_shader_variant *variant;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_fragment_shader *fs = svga->curr.fs;
    enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there is an active geometry shader, and it has stream output
+    * defined, then we will skip the stream output from the vertex shader
+    */
+   if (!svga_have_gs_streamout(svga)) {
+      /* No GS stream out */
+      if (svga_have_vs_streamout(svga)) {
+         /* Set VS stream out */
+         svga_set_stream_output(svga, vs->base.stream_output);
+      }
+      else {
+         /* turn off stream out */
+         svga_set_stream_output(svga, NULL);
+      }
+   }
 
    /* SVGA_NEW_NEED_SWTNL */
-   if (!svga->state.sw.need_swtnl) {
-      struct svga_vertex_shader *vs = svga->curr.vs;
-      struct svga_vs_compile_key key;
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No vertex shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_vs_key(svga, &key);
 
-      make_vs_key( svga, &key );
+      /* See if we already have a VS variant that matches the key */
+      variant = svga_search_shader_key(&vs->base, &key);
 
-      variant = search_vs_key( vs, &key );
       if (!variant) {
-         ret = compile_vs( svga, vs, &key, &variant );
+         /* Create VS variant now */
+         if (key.vs.passthrough) {
+            ret = compile_passthrough_vs(svga, vs, fs, &variant);
+         }
+         else {
+            ret = compile_vs(svga, vs, &key, &variant);
+         }
          if (ret != PIPE_OK)
             return ret;
-      }
 
-      assert(variant);
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = vs->base.variants;
+         vs->base.variants = variant;
+      }
    }
 
    if (variant != svga->state.hw_draw.vs) {
-      if (svga_have_gb_objects(svga)) {
-         struct svga_winsys_gb_shader *gbshader =
-            variant ? variant->gb_shader : NULL;
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
-         if (ret != PIPE_OK)
-            return ret;
-
-         svga->rebind.vs = FALSE;
-      }
-      else {
-         unsigned id = variant ? variant->id : SVGA_ID_INVALID;
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id);
+      /* Bind the new variant */
+      if (variant) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
          if (ret != PIPE_OK)
             return ret;
+         svga->rebind.flags.vs = FALSE;
       }
 
       svga->dirty |= SVGA_NEW_VS_VARIANT;
-      svga->state.hw_draw.vs = variant;      
+      svga->state.hw_draw.vs = variant;
    }
 
    return PIPE_OK;
@@ -268,6 +412,9 @@ struct svga_tracked_state svga_hw_vs =
    "vertex shader (hwtnl)",
    (SVGA_NEW_VS |
     SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
     SVGA_NEW_PRESCALE |
     SVGA_NEW_VELEMENT |
     SVGA_NEW_NEED_SWTNL),
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 85d01546007..79981dcf5ff 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -29,6 +29,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "os/os_thread.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
@@ -36,19 +37,21 @@
 #include "svga_format.h"
 #include "svga_screen.h"
 #include "svga_context.h"
+#include "svga_sampler_view.h"
 #include "svga_resource_texture.h"
 #include "svga_surface.h"
 #include "svga_debug.h"
 
+static void svga_mark_surface_dirty(struct pipe_surface *surf);
 
 void
 svga_texture_copy_handle(struct svga_context *svga,
                          struct svga_winsys_surface *src_handle,
                          unsigned src_x, unsigned src_y, unsigned src_z,
-                         unsigned src_level, unsigned src_face,
+                         unsigned src_level, unsigned src_layer,
                          struct svga_winsys_surface *dst_handle,
                          unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                         unsigned dst_level, unsigned dst_face,
+                         unsigned dst_level, unsigned dst_layer,
                          unsigned width, unsigned height, unsigned depth)
 {
    struct svga_surface dst, src;
@@ -59,12 +62,12 @@ svga_texture_copy_handle(struct svga_context *svga,
 
    src.handle = src_handle;
    src.real_level = src_level;
-   src.real_face = src_face;
+   src.real_layer = src_layer;
    src.real_zslice = 0;
 
    dst.handle = dst_handle;
    dst.real_level = dst_level;
-   dst.real_face = dst_face;
+   dst.real_layer = dst_layer;
    dst.real_zslice = 0;
 
    box.x = dst_x;
@@ -103,11 +106,13 @@ svga_texture_copy_handle(struct svga_context *svga,
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                           struct svga_texture *tex,
+                          unsigned bind_flags,
                           SVGA3dSurfaceFlags flags,
                           SVGA3dSurfaceFormat format,
                           unsigned start_mip,
                           unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                           int zslice_pick,
                           struct svga_host_surface_cache_key *key) /* OUT */
 {
@@ -117,8 +122,8 @@ svga_texture_view_surface(struct svga_context *svga,
    unsigned z_offset = 0;
 
    SVGA_DBG(DEBUG_PERF, 
-            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
-            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+            "svga: Create surface view: layer %d zslice %d mips %d..%d\n",
+            layer_pick, zslice_pick, start_mip, start_mip+num_mip-1);
 
    key->flags = flags;
    key->format = format;
@@ -127,12 +132,20 @@ svga_texture_view_surface(struct svga_context *svga,
    key->size.height = u_minify(tex->b.b.height0, start_mip);
    key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1;
    key->cachable = 1;
+   key->arraySize = 1;
+   key->numFaces = 1;
+   key->sampleCount = tex->b.b.nr_samples;
+
+   if (key->sampleCount > 1) {
+      key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   }
    
-   if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) {
       key->flags |= SVGA3D_SURFACE_CUBEMAP;
       key->numFaces = 6;
-   } else {
-      key->numFaces = 1;
+   } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY ||
+              tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) {
+      key->arraySize = num_layers;
    }
 
    if (key->format == SVGA3D_FORMAT_INVALID) {
@@ -141,7 +154,7 @@ svga_texture_view_surface(struct svga_context *svga,
    }
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
-   handle = svga_screen_surface_create(ss, key);
+   handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key);
    if (!handle) {
       key->cachable = 0;
       return NULL;
@@ -149,15 +162,15 @@ svga_texture_view_surface(struct svga_context *svga,
 
    SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
 
-   if (face_pick < 0)
-      face_pick = 0;
+   if (layer_pick < 0)
+      layer_pick = 0;
 
    if (zslice_pick >= 0)
       z_offset = zslice_pick;
 
    for (i = 0; i < key->numMipLevels; i++) {
-      for (j = 0; j < key->numFaces; j++) {
-         if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) {
+      for (j = 0; j < key->numFaces * key->arraySize; j++) {
+         if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) {
             unsigned depth = (zslice_pick < 0 ?
                               u_minify(tex->b.b.depth0, i + start_mip) :
                               1);
@@ -166,7 +179,7 @@ svga_texture_view_surface(struct svga_context *svga,
                                      tex->handle, 
                                      0, 0, z_offset, 
                                      i + start_mip, 
-                                     j + face_pick,
+                                     j + layer_pick,
                                      handle, 0, 0, 0, i, j,
                                      u_minify(tex->b.b.width0, i + start_mip),
                                      u_minify(tex->b.b.height0, i + start_mip),
@@ -179,33 +192,43 @@ svga_texture_view_surface(struct svga_context *svga,
 }
 
 
+/**
+ * A helper function to create a surface view.
+ * The view boolean flag specifies whether svga_texture_view_surface()
+ * will be called to create a cloned surface and resource for the view.
+ */
 static struct pipe_surface *
-svga_create_surface(struct pipe_context *pipe,
-                    struct pipe_resource *pt,
-                    const struct pipe_surface *surf_tmpl)
+svga_create_surface_view(struct pipe_context *pipe,
+                         struct pipe_resource *pt,
+                         const struct pipe_surface *surf_tmpl,
+                         boolean view)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_texture *tex = svga_texture(pt);
    struct pipe_screen *screen = pipe->screen;
    struct svga_screen *ss = svga_screen(screen);
    struct svga_surface *s;
-   unsigned face, zslice;
-   boolean view = FALSE;
-   SVGA3dSurfaceFlags flags;
+   unsigned layer, zslice, bind;
+   unsigned nlayers = 1;
+   SVGA3dSurfaceFlags flags = 0;
    SVGA3dSurfaceFormat format;
 
-   assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
    s = CALLOC_STRUCT(svga_surface);
    if (!s)
       return NULL;
 
    if (pt->target == PIPE_TEXTURE_CUBE) {
-      face = surf_tmpl->u.tex.first_layer;
+      layer = surf_tmpl->u.tex.first_layer;
       zslice = 0;
    }
+   else if (pt->target == PIPE_TEXTURE_1D_ARRAY ||
+            pt->target == PIPE_TEXTURE_2D_ARRAY) {
+      layer = surf_tmpl->u.tex.first_layer;
+      zslice = 0;
+      nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1;
+   }
    else {
-      face = 0;
+      layer = 0;
       zslice = surf_tmpl->u.tex.first_layer;
    }
 
@@ -218,25 +241,93 @@ svga_create_surface(struct pipe_context *pipe,
    s->base.u.tex.level = surf_tmpl->u.tex.level;
    s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
    s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+   s->view_id = SVGA3D_INVALID_ID;
+
+   s->backed = NULL;
 
    if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
-      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL |
+              SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+      bind = PIPE_BIND_DEPTH_STENCIL;
    }
    else {
-      flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+      flags = SVGA3D_SURFACE_HINT_RENDERTARGET |
+              SVGA3D_SURFACE_BIND_RENDER_TARGET;
+      bind = PIPE_BIND_RENDER_TARGET;
    }
 
-   format = svga_translate_format(ss, surf_tmpl->format, 0);
+   if (tex->imported)
+      format = tex->key.format;
+   else
+      format = svga_translate_format(ss, surf_tmpl->format, bind);
+
    assert(format != SVGA3D_FORMAT_INVALID);
 
-   if (svga_screen(screen)->debug.force_surface_view)
-      view = TRUE;
+   if (view) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      if (svga_have_vgpu10(svga)) {
+         switch (pt->target) {
+         case PIPE_TEXTURE_1D:
+            flags |= SVGA3D_SURFACE_1D;
+            break;
+         case PIPE_TEXTURE_1D_ARRAY:
+            flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_2D_ARRAY:
+            flags |= SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_3D:
+            flags |= SVGA3D_SURFACE_VOLUME;
+            break;
+         case PIPE_TEXTURE_CUBE:
+            if (nlayers == 6)
+               flags |= SVGA3D_SURFACE_CUBEMAP;
+            break;
+         default:
+            break;
+         }
+      }
 
-   /* Currently only used for compressed textures */
-   if (format != svga_translate_format(ss, surf_tmpl->format, 0)) {
-      view = TRUE;
+      /* When we clone the surface view resource, use the format used in
+       * the creation of the original resource.
+       */
+      s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
+                                            surf_tmpl->u.tex.level, 1,
+                                            layer, nlayers, zslice, &s->key);
+      s->key.format = format;
+      s->real_layer = 0;
+      s->real_level = 0;
+      s->real_zslice = 0;
+   } else {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      memset(&s->key, 0, sizeof s->key);
+      s->key.format = format;
+      s->handle = tex->handle;
+      s->real_layer = layer;
+      s->real_zslice = zslice;
+      s->real_level = surf_tmpl->u.tex.level;
    }
 
+   return &s->base;
+}
+
+
+static struct pipe_surface *
+svga_create_surface(struct pipe_context *pipe,
+                    struct pipe_resource *pt,
+                    const struct pipe_surface *surf_tmpl)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_screen *screen = pipe->screen;
+   boolean view = FALSE;
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
    if (surf_tmpl->u.tex.level != 0 &&
        svga_screen(screen)->debug.force_level_surface_view)
       view = TRUE;
@@ -244,47 +335,170 @@ svga_create_surface(struct pipe_context *pipe,
    if (pt->target == PIPE_TEXTURE_3D)
       view = TRUE;
 
-   if (svga_screen(screen)->debug.no_surface_view)
+   if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view)
       view = FALSE;
 
-   if (view) {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
+   return svga_create_surface_view(pipe, pt, surf_tmpl, view);
+}
 
-      s->handle = svga_texture_view_surface(svga, tex, flags, format,
-                                            surf_tmpl->u.tex.level,
-                                            1, face, zslice, &s->key);
-      s->real_face = 0;
-      s->real_level = 0;
-      s->real_zslice = 0;
-   } else {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
 
-      memset(&s->key, 0, sizeof s->key);
-      s->handle = tex->handle;
-      s->real_face = face;
-      s->real_zslice = zslice;
-      s->real_level = surf_tmpl->u.tex.level;
+/**
+ * Clone the surface view and its associated resource.
+ */
+static struct svga_surface *
+create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   struct svga_surface *bs = s->backed;
+
+   if (bs == NULL) {
+      struct svga_texture *tex = svga_texture(s->base.texture);
+      struct pipe_surface *backed_view;
+
+      backed_view = svga_create_surface_view(&svga->pipe,
+                                             &tex->b.b,
+                                             &s->base,
+                                             TRUE);
+
+      bs = svga_surface(backed_view);
+      s->backed = bs;
    }
 
+   svga_mark_surface_dirty(&bs->base);
+
+   return bs;
+}
+
+/**
+ * Create a DX RenderTarget/DepthStencil View for the given surface,
+ * if needed.
+ */
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   enum pipe_error ret = PIPE_OK;
+   int try;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * DX spec explicitly specifies that no resource can be bound to a render
+    * target view and a shader resource view simultanously.
+    * So first check if the resource bound to this surface view collides with
+    * a sampler view. If so, then we will clone this surface view and its
+    * associated resource. We will then use the cloned surface view for
+    * render target.
+    */
+   if (svga_check_sampler_view_resource_collision(svga, s->handle)) {
+      SVGA_DBG(DEBUG_VIEWS,
+               "same resource used in shaderResource and renderTarget 0x%x\n",
+               s->handle);
+      s = create_backed_surface_view(svga, s);
+   }
+
+   if (s->view_id == SVGA3D_INVALID_ID) {
+      SVGA3dResourceType resType;
+      SVGA3dRenderTargetViewDesc desc;
+
+      desc.tex.mipSlice = s->real_level;
+      desc.tex.firstArraySlice = s->real_layer + s->real_zslice;
+      desc.tex.arraySize =
+         s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
+
+      s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+
+      switch (s->base.texture->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resType = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+      case PIPE_TEXTURE_CUBE:
+         /* drawing to cube map is treated as drawing to 2D array */
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resType = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      default:
+         assert(!"Unexpected texture target");
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+                                                       s->view_id,
+                                                       s->handle,
+                                                       s->key.format,
+                                                       resType,
+                                                       &desc);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+                                                       s->view_id,
+                                                       s->handle,
+                                                       s->key.format,
+                                                       resType,
+                                                       &desc);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+
+      assert(ret == PIPE_OK);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+         s->view_id = SVGA3D_INVALID_ID;
+      }
+   }
    return &s->base;
 }
 
 
+
 static void
 svga_surface_destroy(struct pipe_context *pipe,
                      struct pipe_surface *surf)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_surface *s = svga_surface(surf);
    struct svga_texture *t = svga_texture(surf->texture);
    struct svga_screen *ss = svga_screen(surf->texture->screen);
+   enum pipe_error ret = PIPE_OK;
+
+   /* Destroy the backed view surface if it exists */
+   if (s->backed) {
+      svga_surface_destroy(pipe, &s->backed->base);
+      s->backed = NULL;
+   }
 
    if (s->handle != t->handle) {
       SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
       svga_screen_surface_destroy(ss, &s->key, &s->handle);
    }
 
+   if (s->view_id != SVGA3D_INVALID_ID) {
+      unsigned try;
+
+      assert(svga_have_vgpu10(svga));
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+      assert(ret == PIPE_OK);
+      util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+   }
+
    pipe_resource_reference(&surf->texture, NULL);
    FREE(surf);
 }
@@ -302,11 +516,8 @@ svga_mark_surface_dirty(struct pipe_surface *surf)
 
       if (s->handle == tex->handle) {
          /* hmm so 3d textures always have all their slices marked ? */
-         if (surf->texture->target == PIPE_TEXTURE_CUBE)
-            svga_define_texture_level(tex, surf->u.tex.first_layer,
-                                      surf->u.tex.level);
-         else
-            svga_define_texture_level(tex, 0, surf->u.tex.level);
+         svga_define_texture_level(tex, surf->u.tex.first_layer,
+                                   surf->u.tex.level);
       }
       else {
          /* this will happen later in svga_propagate_surface */
@@ -345,18 +556,26 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
    struct svga_surface *s = svga_surface(surf);
    struct svga_texture *tex = svga_texture(surf->texture);
    struct svga_screen *ss = svga_screen(surf->texture->screen);
-   unsigned zslice, face;
+   unsigned zslice, layer;
+   unsigned nlayers = 1;
+   unsigned i;
 
    if (!s->dirty)
       return;
 
    if (surf->texture->target == PIPE_TEXTURE_CUBE) {
       zslice = 0;
-      face = surf->u.tex.first_layer;
+      layer = surf->u.tex.first_layer;
+   }
+   else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+            surf->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+      zslice = 0;
+      layer = surf->u.tex.first_layer;
+      nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
    }
    else {
       zslice = surf->u.tex.first_layer;
-      face = 0;
+      layer = 0;
    }
 
    s->dirty = FALSE;
@@ -367,12 +586,14 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
       SVGA_DBG(DEBUG_VIEWS,
                "svga: Surface propagate: tex %p, level %u, from %p\n",
                tex, surf->u.tex.level, surf);
-      svga_texture_copy_handle(svga,
-                               s->handle, 0, 0, 0, s->real_level, s->real_face,
-                               tex->handle, 0, 0, zslice, surf->u.tex.level, face,
-                               u_minify(tex->b.b.width0, surf->u.tex.level),
-                               u_minify(tex->b.b.height0, surf->u.tex.level), 1);
-      svga_define_texture_level(tex, face, surf->u.tex.level);
+      for (i = 0; i < nlayers; i++) {
+         svga_texture_copy_handle(svga,
+                                  s->handle, 0, 0, 0, s->real_level, s->real_layer + i,
+                                  tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i,
+                                  u_minify(tex->b.b.width0, surf->u.tex.level),
+                                  u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+         svga_define_texture_level(tex, layer + i, surf->u.tex.level);
+      }
    }
 }
 
@@ -390,10 +611,76 @@ svga_surface_needs_propagation(const struct pipe_surface *surf)
 }
 
 
+static void
+svga_get_sample_position(struct pipe_context *context,
+                         unsigned sample_count, unsigned sample_index,
+                         float *pos_out)
+{
+   /* We can't actually query the device to learn the sample positions.
+    * These were grabbed from nvidia's driver.
+    */
+   static const float pos1[1][2] = {
+      { 0.5, 0.5 }
+   };
+   static const float pos4[4][2] = {
+      { 0.375000, 0.125000 },
+      { 0.875000, 0.375000 },
+      { 0.125000, 0.625000 },
+      { 0.625000, 0.875000 }
+   };
+   static const float pos8[8][2] = {
+      { 0.562500, 0.312500 },
+      { 0.437500, 0.687500 },
+      { 0.812500, 0.562500 },
+      { 0.312500, 0.187500 },
+      { 0.187500, 0.812500 },
+      { 0.062500, 0.437500 },
+      { 0.687500, 0.937500 },
+      { 0.937500, 0.062500 }
+   };
+   static const float pos16[16][2] = {
+      { 0.187500, 0.062500 },
+      { 0.437500, 0.187500 },
+      { 0.062500, 0.312500 },
+      { 0.312500, 0.437500 },
+      { 0.687500, 0.062500 },
+      { 0.937500, 0.187500 },
+      { 0.562500, 0.312500 },
+      { 0.812500, 0.437500 },
+      { 0.187500, 0.562500 },
+      { 0.437500, 0.687500 },
+      { 0.062500, 0.812500 },
+      { 0.312500, 0.937500 },
+      { 0.687500, 0.562500 },
+      { 0.937500, 0.687500 },
+      { 0.562500, 0.812500 },
+      { 0.812500, 0.937500 }
+   };
+   const float (*positions)[2];
+
+   switch (sample_count) {
+   case 4:
+      positions = pos4;
+      break;
+   case 8:
+      positions = pos8;
+      break;
+   case 16:
+      positions = pos16;
+      break;
+   default:
+      positions = pos1;
+   }
+
+   pos_out[0] = positions[sample_index][0];
+   pos_out[1] = positions[sample_index][1];
+}
+
 
 void
 svga_init_surface_functions(struct svga_context *svga)
 {
    svga->pipe.create_surface = svga_create_surface;
    svga->pipe.surface_destroy = svga_surface_destroy;
+   svga->pipe.get_sample_position = svga_get_sample_position;
 }
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index 2fa72a1c8f0..0e5794b0b38 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -47,11 +47,15 @@ struct svga_surface
    struct svga_host_surface_cache_key key;
    struct svga_winsys_surface *handle;
 
-   unsigned real_face;
+   unsigned real_layer;
    unsigned real_level;
    unsigned real_zslice;
 
    boolean dirty;
+
+   /* VGPU10 */
+   SVGA3dRenderTargetViewId view_id;
+   struct svga_surface *backed;
 };
 
 
@@ -64,11 +68,13 @@ svga_surface_needs_propagation(const struct pipe_surface *surf);
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                           struct svga_texture *tex,
+                          unsigned bind_flags,
                           SVGA3dSurfaceFlags flags,
                           SVGA3dSurfaceFormat format,
                           unsigned start_mip,
                           unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                           int zslice_pick,
                           struct svga_host_surface_cache_key *key); /* OUT */
 
@@ -99,4 +105,8 @@ svga_surface_const(const struct pipe_surface *surface)
    return (const struct svga_surface *)surface;
 }
 
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s);
+
+
 #endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
index ded8bcbd536..4bdb21a9842 100644
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -40,6 +40,7 @@
 #include "svga_reg.h"
 #include "svga3d_reg.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_swtnl_private.h"
 
 
@@ -129,9 +130,12 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render )
                                          PIPE_TRANSFER_DISCARD_RANGE |
                                          PIPE_TRANSFER_UNSYNCHRONIZED,
                                          &svga_render->vbuf_transfer);
-      if (ptr)
+      if (ptr) {
+         svga_render->vbuf_ptr = ptr;
          return ptr + svga_render->vbuf_offset;
+      }
       else {
+         svga_render->vbuf_ptr = NULL;
          svga_render->vbuf_transfer = NULL;
          return NULL;
       }
@@ -154,6 +158,18 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
 
    offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
    length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+   if (0) {
+      /* dump vertex data */
+      const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+                                        svga_render->vbuf_offset);
+      unsigned i;
+      debug_printf("swtnl vertex data:\n");
+      for (i = 0; i < length / 4; i += 4) {
+         debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+      }
+   }
+
    pipe_buffer_flush_mapped_range(&svga->pipe,
 				  svga_render->vbuf_transfer,
 				  offset, length);
@@ -178,6 +194,7 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
    SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
    enum pipe_error ret;
    unsigned i;
+   static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
 
    /* if the vdecl or vbuf hasn't changed do nothing */
    if (!svga->swtnl.new_vdecl)
@@ -192,18 +209,27 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
       ret = svga_hwtnl_flush(svga->hwtnl);
       /* if we hit this path we might become synced with hw */
       svga->swtnl.new_vbuf = TRUE;
-      assert(ret == 0);
+      assert(ret == PIPE_OK);
    }
 
-   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
    for (i = 0; i < svga_render->vdecl_count; i++) {
       vdecl[i].array.offset += svga_render->vdecl_offset;
+   }
 
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &vdecl[i],
-                        svga_render->vbuf );
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga_render->vdecl_count,
+                           vdecl,
+                           zero,
+                           svga_render->layout_id);
+
+   /* Specify the vertex buffer (there's only ever one) */
+   {
+      struct pipe_vertex_buffer vb;
+      vb.buffer = svga_render->vbuf;
+      vb.buffer_offset = svga_render->vdecl_offset;
+      vb.stride = vdecl[0].array.stride;
+      vb.user_buffer = NULL;
+      svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
    }
 
    /* We have already taken care of flatshading, so let the hwtnl
@@ -211,15 +237,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
     */
    if (svga->state.sw.need_pipeline) {
       svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
-      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+      svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
    }
    else {
       svga_hwtnl_set_flatshade( svga->hwtnl,
-                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade ||
+                                svga->state.hw_draw.fs->uses_flat_interp,
                                 svga->curr.rast->templ.flatshade_first );
 
-      svga_hwtnl_set_unfilled( svga->hwtnl,
-                               svga->curr.rast->hw_unfilled );
+      svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
    }
 
    svga->swtnl.new_vdecl = FALSE;
@@ -227,13 +253,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
 
 static void
 svga_vbuf_render_draw_arrays( struct vbuf_render *render,
-                              unsigned start,
-                              uint nr )
+                              unsigned start, uint nr )
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
    unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
    enum pipe_error ret = PIPE_OK;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
 
    /* off to hardware */
    svga_vbuf_submit_state(svga_render);
@@ -244,10 +272,13 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
     */
    svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
 
-   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr,
+                                start_instance, instance_count);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
+                                   start + bias, nr,
+                                   start_instance, instance_count);
       svga->swtnl.new_vbuf = TRUE;
       assert(ret == PIPE_OK);
    }
@@ -265,6 +296,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
    int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
    boolean ret;
    size_t size = 2 * nr_indices;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
 
    assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
    
@@ -299,7 +333,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                         svga_render->min_index,
                                         svga_render->max_index,
                                         svga_render->prim,
-                                        svga_render->ibuf_offset / 2, nr_indices);
+                                        svga_render->ibuf_offset / 2, nr_indices,
+                                        start_instance, instance_count);
    if(ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
@@ -309,7 +344,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                            svga_render->min_index,
                                            svga_render->max_index,
                                            svga_render->prim,
-                                           svga_render->ibuf_offset / 2, nr_indices);
+                                           svga_render->ibuf_offset / 2,
+                                           nr_indices,
+                                           start_instance, instance_count);
       svga->swtnl.new_vbuf = TRUE;
       assert(ret == PIPE_OK);
    }
@@ -349,6 +386,7 @@ svga_vbuf_render_create( struct svga_context *svga )
    svga_render->vbuf_size = 0;
    svga_render->ibuf_alloc_size = 4*1024;
    svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->layout_id = SVGA3D_INVALID_ID;
    svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
    svga_render->base.max_indices = 65536;
    svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 832249523cc..6a8e857cee8 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -42,9 +42,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
 {
    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 };
    struct pipe_transfer *ib_transfer = NULL;
-   struct pipe_transfer *cb_transfer = NULL;
+   struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 };
    struct draw_context *draw = svga->swtnl.draw;
-   unsigned i;
+   unsigned i, old_num_vertex_buffers;
    const void *map;
    enum pipe_error ret;
 
@@ -76,6 +76,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
          draw_set_mapped_vertex_buffer(draw, i, map, ~0);
       }
    }
+   old_num_vertex_buffers = svga->curr.num_vertex_buffers;
 
    /* Map index buffer, if present */
    map = NULL;
@@ -88,16 +89,21 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
                        svga->curr.ib.index_size, ~0);
    }
 
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
+   /* Map constant buffers */
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) {
+         continue;
+      }
+
       map = pipe_buffer_map(&svga->pipe,
-                            svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer,
+                            svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
                             PIPE_TRANSFER_READ,
-			    &cb_transfer);
+                            &cb_transfer[i]);
       assert(map);
       draw_set_mapped_constant_buffer(
-         draw, PIPE_SHADER_VERTEX, 0,
+         draw, PIPE_SHADER_VERTEX, i,
          map,
-         svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0);
+         svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0);
    }
 
    draw_vbo(draw, info);
@@ -105,8 +111,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
    draw_flush(svga->swtnl.draw);
 
    /* Ensure the draw module didn't touch this */
-   assert(i == svga->curr.num_vertex_buffers);
-   
+   assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers);
+
    /*
     * unmap vertex/index buffers
     */
@@ -122,8 +128,10 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
       draw_set_indexes(draw, NULL, 0, 0);
    }
 
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
-      pipe_buffer_unmap(&svga->pipe, cb_transfer);
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) {
+         pipe_buffer_unmap(&svga->pipe, cb_transfer[i]);
+      }
    }
 
    /* Now safe to remove the need_swtnl flag in any update_state call */
@@ -167,9 +175,6 @@ boolean svga_init_swtnl( struct svga_context *svga )
    if (!screen->haveLineSmooth)
       draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
 
-   /* always install polygon stipple stage */
-   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
-
    /* enable/disable line stipple stage depending on device caps */
    draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple);
 
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
index e2106e1e8e6..0a226abca88 100644
--- a/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -43,6 +43,8 @@ struct svga_vbuf_render {
 
    unsigned vertex_size;
 
+   SVGA3dElementLayoutId layout_id; /**< current element layout id */
+
    unsigned prim;
 
    struct pipe_resource *vbuf;
@@ -50,6 +52,8 @@ struct svga_vbuf_render {
    struct pipe_transfer *vbuf_transfer;
    struct pipe_transfer *ibuf_transfer;
 
+   void *vbuf_ptr;
+
    /* current size of buffer */
    size_t vbuf_size;
    size_t ibuf_size;
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index e62698e11f4..79dc0bf580c 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -25,10 +25,13 @@
 
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
+#include "util/u_bitmask.h"
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"
 
+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_swtnl.h"
 #include "svga_state.h"
 #include "svga_tgsi.h"
@@ -51,30 +54,37 @@ static void set_draw_viewport( struct svga_context *svga )
    float adjx = 0.0f;
    float adjy = 0.0f;
 
-   switch (svga->curr.reduced_prim) {
-   case PIPE_PRIM_POINTS:
-      adjx = SVGA_POINT_ADJ_X;
-      adjy = SVGA_POINT_ADJ_Y;
-      break;
-   case PIPE_PRIM_LINES:
-      /* XXX: This is to compensate for the fact that wide lines are
-       * going to be drawn with triangles, but we're not catching all
-       * cases where that will happen.
-       */
-      if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
-      {
-         adjx = SVGA_LINE_ADJ_X + 0.175f;
-         adjy = SVGA_LINE_ADJ_Y - 0.175f;
+   if (svga_have_vgpu10(svga)) {
+      if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) {
+         adjy = 0.25;
       }
-      else {
-         adjx = SVGA_LINE_ADJ_X;
-         adjy = SVGA_LINE_ADJ_Y;
+   }
+   else {
+      switch (svga->curr.reduced_prim) {
+      case PIPE_PRIM_POINTS:
+         adjx = SVGA_POINT_ADJ_X;
+         adjy = SVGA_POINT_ADJ_Y;
+         break;
+      case PIPE_PRIM_LINES:
+         /* XXX: This is to compensate for the fact that wide lines are
+          * going to be drawn with triangles, but we're not catching all
+          * cases where that will happen.
+          */
+         if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
+         {
+            adjx = SVGA_LINE_ADJ_X + 0.175f;
+            adjy = SVGA_LINE_ADJ_Y - 0.175f;
+         }
+         else {
+            adjx = SVGA_LINE_ADJ_X;
+            adjy = SVGA_LINE_ADJ_Y;
+         }
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         adjx += SVGA_TRIANGLE_ADJ_X;
+         adjy += SVGA_TRIANGLE_ADJ_Y;
+         break;
       }
-      break;
-   case PIPE_PRIM_TRIANGLES:
-      adjx += SVGA_TRIANGLE_ADJ_X;
-      adjy += SVGA_TRIANGLE_ADJ_Y;
-      break;
    }
 
    vp.translate[0] += adjx;
@@ -150,6 +160,59 @@ struct svga_tracked_state svga_update_swtnl_draw =
 };
 
 
+static SVGA3dSurfaceFormat
+translate_vertex_format(SVGA3dDeclType format)
+{
+   switch (format) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      return SVGA3D_R32G32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      return SVGA3D_R32G32B32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      return SVGA3D_R32G32B32A32_FLOAT;
+   default:
+      assert(!"Unexpected format in translate_vertex_format()");
+      return SVGA3D_R32G32B32A32_FLOAT;
+   }
+}
+
+
+static SVGA3dElementLayoutId
+svga_vdecl_to_input_element(struct svga_context *svga,
+                            const SVGA3dVertexDecl *vdecl, unsigned num_decls)
+{
+   SVGA3dElementLayoutId id;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(num_decls <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < num_decls; i++) {
+      elements[i].inputSlot = 0; /* vertex buffer index */
+      elements[i].alignedByteOffset = vdecl[i].array.offset;
+      elements[i].format = translate_vertex_format(vdecl[i].identity.type);
+      elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+      elements[i].instanceDataStepRate = 0;
+      elements[i].inputRegister = i;
+   }
+
+   id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+      assert(ret == PIPE_OK);
+   }
+
+   return id;
+}
+
+
 enum pipe_error
 svga_swtnl_update_vdecl( struct svga_context *svga )
 {
@@ -164,16 +227,19 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
    int nr_decls = 0;
    int src;
    unsigned i;
+   int any_change;
 
    memset(vinfo, 0, sizeof(*vinfo));
    memset(vdecl, 0, sizeof(vdecl));
 
    draw_prepare_shader_outputs(draw);
+
    /* always add position */
    src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
    draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
    vinfo->attrib[0].emit = EMIT_4F;
    vdecl[0].array.offset = offset;
+   vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
    vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
    vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
    vdecl[0].identity.usageIndex = 0;
@@ -225,16 +291,67 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
    draw_compute_vertex_size(vinfo);
 
    svga_render->vdecl_count = nr_decls;
-   for (i = 0; i < svga_render->vdecl_count; i++)
+   for (i = 0; i < svga_render->vdecl_count; i++) {
       vdecl[i].array.stride = offset;
+   }
 
-   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
-      return PIPE_OK;
+   any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
+         return PIPE_OK;
+      }
+
+      if (svga_render->layout_id != SVGA3D_INVALID_ID) {
+         /* destroy old */
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                  svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                     svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         /**
+          * reset current layout id state after the element layout is
+          * destroyed, so that if a new layout has the same layout id, we
+          * will know to re-issue the SetInputLayout command.
+          */
+         if (svga->state.hw_draw.layout_id == svga_render->layout_id)
+            svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+         util_bitmask_clear(svga->input_element_object_id_bm,
+                            svga_render->layout_id);
+      }
+
+      svga_render->layout_id =
+         svga_vdecl_to_input_element(svga, vdecl, nr_decls);
+
+      /* bind new */
+      if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
+         ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                               svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         svga->state.hw_draw.layout_id = svga_render->layout_id;
+      }
+   }
+   else {
+      if (!any_change)
+         return PIPE_OK;
+   }
 
    memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
    svga->swtnl.new_vdecl = TRUE;
 
-   return PIPE_OK;
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 2e2ff5e4673..9a6fb465ccb 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -37,6 +37,7 @@
 #include "svgadump/svga_shader_dump.h"
 
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga_tgsi_emit.h"
 #include "svga_debug.h"
@@ -165,97 +166,6 @@ svga_shader_emit_header(struct svga_shader_emitter *emit)
 }
 
 
-/**
- * Use the shader info to generate a bitmask indicating which generic
- * inputs are used by the shader.  A set bit indicates that GENERIC[i]
- * is used.
- */
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
-{
-   unsigned i, mask = 0x0;
-
-   for (i = 0; i < info->num_inputs; i++) {
-      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
-         unsigned j = info->input_semantic_index[i];
-         assert(j < sizeof(mask) * 8);
-         mask |= 1 << j;
-      }
-   }
-
-   return mask;
-}
-
-
-/**
- * Given a mask of used generic variables (as returned by the above functions)
- * fill in a table which maps those indexes to small integers.
- * This table is used by the remap_generic_index() function in
- * svga_tgsi_decl_sm30.c
- * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
- * GENERIC[3] are used.  The remap_table will contain:
- *   table[1] = 0;
- *   table[3] = 1;
- * The remaining table entries will be filled in with the next unused
- * generic index (in this example, 2).
- */
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING])
-{
-   /* Note texcoord[0] is reserved so start at 1 */
-   unsigned count = 1, i;
-
-   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-      remap_table[i] = -1;
-   }
-
-   /* for each bit set in generic_mask */
-   while (generics_mask) {
-      unsigned index = ffs(generics_mask) - 1;
-      remap_table[index] = count++;
-      generics_mask &= ~(1 << index);
-   }
-}
-
-
-/**
- * Use the generic remap table to map a TGSI generic varying variable
- * index to a small integer.  If the remapping table doesn't have a
- * valid value for the given index (the table entry is -1) it means
- * the fragment shader doesn't use that VS output.  Just allocate
- * the next free value in that case.  Alternately, we could cull
- * VS instructions that write to register, or replace the register
- * with a dummy temp register.
- * XXX TODO: we should do one of the later as it would save precious
- * texcoord registers.
- */
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index)
-{
-   assert(generic_index < MAX_GENERIC_VARYING);
-
-   if (generic_index >= MAX_GENERIC_VARYING) {
-      /* just don't return a random/garbage value */
-      generic_index = MAX_GENERIC_VARYING - 1;
-   }
-
-   if (remap_table[generic_index] == -1) {
-      /* This is a VS output that has no matching PS input.  Find a
-       * free index.
-       */
-      int i, max = 0;
-      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-         max = MAX2(max, remap_table[i]);
-      }
-      remap_table[generic_index] = max + 1;
-   }
-
-   return remap_table[generic_index];
-}
-
-
 /**
  * Parse TGSI shader and translate to SVGA/DX9 serialized
  * representation.
@@ -264,9 +174,9 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
  * can be dynamically grown.  Once we've finished and know how large
  * it is, it will be copied to a hardware buffer for upload.
  */
-static struct svga_shader_variant *
-svga_tgsi_translate(const struct svga_shader *shader,
-                    const struct svga_compile_key *key, unsigned unit)
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit)
 {
    struct svga_shader_variant *variant = NULL;
    struct svga_shader_emitter emit;
@@ -288,10 +198,10 @@ svga_tgsi_translate(const struct svga_shader *shader,
    emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
 
    if (unit == PIPE_SHADER_FRAGMENT)
-      emit.imm_start += key->fkey.num_unnormalized_coords;
+      emit.imm_start += key->num_unnormalized_coords;
 
    if (unit == PIPE_SHADER_VERTEX) {
-      emit.imm_start += key->vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key->vs.need_prescale ? 2 : 0;
    }
 
    emit.nr_hw_float_const =
@@ -327,7 +237,11 @@ svga_tgsi_translate(const struct svga_shader *shader,
    memcpy(&variant->key, key, sizeof(*key));
    variant->id = UTIL_BITMASK_INVALID_INDEX;
 
-   if (SVGA_DEBUG & DEBUG_TGSI) {
+   variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+#if 0
+   if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
+       SVGA_DEBUG & DEBUG_TGSI) {
       debug_printf("#####################################\n");
       debug_printf("Shader %u below\n", shader->id);
       tgsi_dump(shader->tokens, 0);
@@ -337,6 +251,7 @@ svga_tgsi_translate(const struct svga_shader *shader,
       }
       debug_printf("#####################################\n");
    }
+#endif
 
    return variant;
 
@@ -345,39 +260,3 @@ svga_tgsi_translate(const struct svga_shader *shader,
    FREE(emit.buf);
    return NULL;
 }
-
-
-struct svga_shader_variant *
-svga_translate_fragment_program(const struct svga_fragment_shader *fs,
-                                const struct svga_fs_compile_key *fkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.fkey, fkey, sizeof *fkey);
-
-   memcpy(key.generic_remap_table, fs->generic_remap_table,
-          sizeof(fs->generic_remap_table));
-
-   return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT);
-}
-
-
-struct svga_shader_variant *
-svga_translate_vertex_program(const struct svga_vertex_shader *vs,
-                              const struct svga_vs_compile_key *vkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.vkey, vkey, sizeof *vkey);
-
-   /* Note: we could alternately store the remap table in the vkey but
-    * that would make it larger.  We just regenerate it here instead.
-    */
-   svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table);
-
-   return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX);
-}
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 5c47a4ad39f..207a3f0a845 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -26,94 +26,16 @@
 #ifndef SVGA_TGSI_H
 #define SVGA_TGSI_H
 
-#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+#include "svga3d_reg.h"
 
-#include "svga_hw_reg.h"
 
+#define MAX_VGPU10_ADDR_REGS 2
 
-/**
- * We use a 32-bit mask to keep track of the generic indexes.
- */
-#define MAX_GENERIC_VARYING 32
-
-
-struct svga_fragment_shader;
-struct svga_vertex_shader;
+struct svga_compile_key;
+struct svga_context;
 struct svga_shader;
-struct tgsi_shader_info;
-struct tgsi_token;
-
-
-struct svga_vs_compile_key
-{
-   unsigned fs_generic_inputs;
-   unsigned need_prescale:1;
-   unsigned allow_psiz:1;
-   unsigned adjust_attrib_range:16;
-   unsigned adjust_attrib_w_1:16;
-};
-
-struct svga_fs_compile_key
-{
-   unsigned light_twoside:1;
-   unsigned front_ccw:1;
-   unsigned white_fragments:1;
-   unsigned write_color0_to_n_cbufs:3;
-   unsigned num_textures:8;
-   unsigned num_unnormalized_coords:8;
-   unsigned sprite_origin_lower_left:1;
-   struct {
-      unsigned compare_mode:1;
-      unsigned compare_func:3;
-      unsigned unnormalized:1;
-      unsigned width_height_idx:7;
-      unsigned texture_target:8;
-      unsigned sprite_texgen:1;
-      unsigned swizzle_r:3;
-      unsigned swizzle_g:3;
-      unsigned swizzle_b:3;
-      unsigned swizzle_a:3;
-   } tex[PIPE_MAX_SAMPLERS];
-};
-
-/**
- * Key/index for identifying shader variants.
- */
-struct svga_compile_key {
-   struct svga_vs_compile_key vkey;
-   struct svga_fs_compile_key fkey;
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-/**
- * A single TGSI shader may be compiled into different variants of
- * SVGA3D shaders depending on the compile key.  Each user shader
- * will have a linked list of these variants.
- */
-struct svga_shader_variant
-{
-   const struct svga_shader *shader;
-
-   /** Parameters used to generate this variant */
-   struct svga_compile_key key;
-
-   /* Compiled shader tokens:
-    */
-   const unsigned *tokens;
-   unsigned nr_tokens;
-
-   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
-    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
-    */
-   unsigned id;
-   
-   /* GB object buffer containing the bytecode */
-   struct svga_winsys_gb_shader *gb_shader;
-
-   /** Next variant */
-   struct svga_shader_variant *next;
-};
+struct svga_shader_variant;
 
 
 /* TGSI doesn't provide use with VS input semantics (they're actually
@@ -140,37 +62,16 @@ static inline void svga_generate_vdecl_semantics( unsigned idx,
 
 
-static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
-{
-   return sizeof *key;
-}
-
-static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
-{
-   return (const char *)&key->tex[key->num_textures] - (const char *)key;
-}
-
 struct svga_shader_variant *
-svga_translate_fragment_program( const struct svga_fragment_shader *fs,
-                                 const struct svga_fs_compile_key *fkey );
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit);
 
 struct svga_shader_variant *
-svga_translate_vertex_program( const struct svga_vertex_shader *fs,
-                               const struct svga_vs_compile_key *vkey );
-
-
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
-
-unsigned
-svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
-
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING]);
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit);
 
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index);
+boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens);
 
 #endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index 42d6f489bc5..ca4009b9e38 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -216,7 +216,7 @@ ps30_input(struct svga_shader_emitter *emit,
 
       return emit_decl( emit, reg, 0, 0 );
    }
-   else if (emit->key.fkey.light_twoside &&
+   else if (emit->key.fs.light_twoside &&
             (semantic.Name == TGSI_SEMANTIC_COLOR)) {
 
       if (!translate_vs_ps_semantic( emit, semantic, &usage, &index ))
@@ -285,9 +285,9 @@ ps30_input(struct svga_shader_emitter *emit,
          return FALSE;
 
       if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
-          emit->key.fkey.sprite_origin_lower_left &&
+          emit->key.sprite_origin_lower_left &&
           index >= 1 &&
-          emit->key.fkey.tex[index - 1].sprite_texgen) {
+          emit->key.tex[index - 1].sprite_texgen) {
          /* This is a sprite texture coord with lower-left origin.
           * We need to invert the texture T coordinate since the SVGA3D
           * device only supports an upper-left origin.
@@ -329,7 +329,7 @@ ps30_output(struct svga_shader_emitter *emit,
    switch (semantic.Name) {
    case TGSI_SEMANTIC_COLOR:
       if (emit->unit == PIPE_SHADER_FRAGMENT) {
-         if (emit->key.fkey.white_fragments) {
+         if (emit->key.fs.white_fragments) {
             /* Used for XOR logicop mode */
             emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                                   emit->nr_hw_temp++ );
@@ -337,14 +337,14 @@ ps30_output(struct svga_shader_emitter *emit,
             emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, 
                                                         semantic.Index);
          }
-         else if (emit->key.fkey.write_color0_to_n_cbufs) {
+         else if (emit->key.fs.write_color0_to_n_cbufs) {
             /* We'll write color output [0] to all render targets.
              * Prepare all the output registers here, but only when the
              * semantic.Index == 0 so we don't do this more than once.
              */
             if (semantic.Index == 0) {
                unsigned i;
-               for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) {
+               for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
                   emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
                                                      emit->nr_hw_temp++);
                   emit->temp_color_output[i] = emit->output_map[idx+i];
@@ -487,7 +487,7 @@ vs30_output(struct svga_shader_emitter *emit,
       /* This has the effect of not declaring psiz (below) and not 
        * emitting the final MOV to true_psiz in the postamble.
        */
-      if (!emit->key.vkey.allow_psiz)
+      if (!emit->key.vs.allow_psiz)
          return TRUE;
 
       emit->true_psiz = dcl.dst;
@@ -517,7 +517,7 @@ vs30_output(struct svga_shader_emitter *emit,
 static ubyte
 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
 {
-   switch (emit->key.fkey.tex[idx].texture_target) {
+   switch (emit->key.tex[idx].texture_target) {
    case PIPE_TEXTURE_1D:
       return SVGA3DSAMP_2D;
    case PIPE_TEXTURE_2D:
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 1a1dac23507..0b82483ab2e 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -28,6 +28,7 @@
 
 #include "tgsi/tgsi_scan.h"
 #include "svga_hw_reg.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga3d_shaderdefs.h"
 
@@ -130,6 +131,8 @@ struct svga_shader_emitter
    struct svga_arl_consts arl_consts[12];
    int num_arl_consts;
    int current_arl;
+
+   unsigned pstipple_sampler_unit;
 };
 
 
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index bac956066a5..00c91a4fa61 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -29,6 +29,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_pstipple.h"
 
 #include "svga_tgsi_emit.h"
 #include "svga_context.h"
@@ -862,7 +863,7 @@ create_common_immediate( struct svga_shader_emitter *emit )
    idx++;
 
    /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
-   if (emit->key.vkey.adjust_attrib_range) {
+   if (emit->key.vs.adjust_attrib_range) {
       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
                            idx, 2.0f, 0.0f, 0.0f, 0.0f ))
          return FALSE;
@@ -1015,7 +1016,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
    struct src_register reg;
 
    /* the width/height indexes start right after constants */
-   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+   idx = emit->key.tex[sampler_num].width_height_idx +
          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
 
    reg = src_register( SVGA3DREG_CONST, idx );
@@ -1723,7 +1724,7 @@ emit_tex2(struct svga_shader_emitter *emit,
    texcoord = translate_src_register( emit, &insn->Src[0] );
    sampler = translate_src_register( emit, &insn->Src[1] );
 
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+   if (emit->key.tex[sampler.base.num].unnormalized ||
        emit->dynamic_branching_level > 0)
       tmp = get_temp( emit );
 
@@ -1755,7 +1756,7 @@ emit_tex2(struct svga_shader_emitter *emit,
 
    /* Explicit normalization of texcoords:
     */
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+   if (emit->key.tex[sampler.base.num].unnormalized) {
       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
 
       /* MUL  tmp, SRC0, WH */
@@ -1891,14 +1892,14 @@ emit_tex(struct svga_shader_emitter *emit,
    const unsigned unit = src1.base.num;
 
    /* check for shadow samplers */
-   boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+   boolean compare = (emit->key.tex[unit].compare_mode ==
                       PIPE_TEX_COMPARE_R_TO_TEXTURE);
 
    /* texture swizzle */
-   boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
-                      emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
-                      emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
-                      emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+   boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
+                      emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
+                      emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
+                      emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
 
    boolean saturate = insn->Instruction.Saturate;
 
@@ -1965,7 +1966,7 @@ emit_tex(struct svga_shader_emitter *emit,
 
          /* Compare texture sample value against R component of texcoord */
          if (!emit_select(emit,
-                          emit->key.fkey.tex[unit].compare_func,
+                          emit->key.tex[unit].compare_func,
                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
                           r_coord,
                           tex_src_x))
@@ -1991,10 +1992,10 @@ emit_tex(struct svga_shader_emitter *emit,
       /* swizzle from tex_result to dst (handles saturation too, if any) */
       emit_tex_swizzle(emit,
                        dst, src(tex_result),
-                       emit->key.fkey.tex[unit].swizzle_r,
-                       emit->key.fkey.tex[unit].swizzle_g,
-                       emit->key.fkey.tex[unit].swizzle_b,
-                       emit->key.fkey.tex[unit].swizzle_a);
+                       emit->key.tex[unit].swizzle_r,
+                       emit->key.tex[unit].swizzle_g,
+                       emit->key.tex[unit].swizzle_b,
+                       emit->key.tex[unit].swizzle_a);
    }
 
    return TRUE;
@@ -3113,7 +3114,7 @@ make_immediate(struct svga_shader_emitter *emit,
 static boolean
 emit_vs_preamble(struct svga_shader_emitter *emit)
 {
-   if (!emit->key.vkey.need_prescale) {
+   if (!emit->key.vs.need_prescale) {
       if (!make_immediate( emit, 0, 0, .5, .5,
                            &emit->imm_0055))
          return FALSE;
@@ -3190,7 +3191,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
           * logicop workaround.
           */
          if (emit->unit == PIPE_SHADER_FRAGMENT &&
-             emit->key.fkey.white_fragments) {
+             emit->key.fs.white_fragments) {
             struct src_register one = get_one_immediate(emit);
 
             if (!submit_op1( emit,
@@ -3200,7 +3201,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
                return FALSE;
          }
          else if (emit->unit == PIPE_SHADER_FRAGMENT &&
-                  i < emit->key.fkey.write_color0_to_n_cbufs) {
+                  i < emit->key.fs.write_color0_to_n_cbufs) {
             /* Write temp color output [0] to true output [i] */
             if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
                             emit->true_color_output[i],
@@ -3244,7 +3245,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit)
    /* Need to perform various manipulations on vertex position to cope
     * with the different GL and D3D clip spaces.
     */
-   if (emit->key.vkey.need_prescale) {
+   if (emit->key.vs.need_prescale) {
       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
       SVGA3dShaderDestToken depth = emit->depth_pos;
       SVGA3dShaderDestToken pos = emit->true_pos;
@@ -3372,7 +3373,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)
 
    if_token = inst_token( SVGA3DOP_IFC );
 
-   if (emit->key.fkey.front_ccw)
+   if (emit->key.fs.front_ccw)
       if_token.control = SVGA3DOPCOMP_LT;
    else
       if_token.control = SVGA3DOPCOMP_GT;
@@ -3423,7 +3424,7 @@ emit_frontface(struct svga_shader_emitter *emit)
    temp = dst_register( SVGA3DREG_TEMP,
                         emit->nr_hw_temp++ );
 
-   if (emit->key.fkey.front_ccw) {
+   if (emit->key.fs.front_ccw) {
       pass = get_zero_immediate(emit);
       fail = get_one_immediate(emit);
    } else {
@@ -3494,8 +3495,8 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
 static boolean
 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
 {
-   unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
-                           emit->key.vkey.adjust_attrib_w_1);
+   unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+                           emit->key.vs.adjust_attrib_w_1);
  
    while (adjust_mask) {
       /* Adjust vertex attrib range and/or set W component = 1 */
@@ -3506,7 +3507,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
       tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
       emit->nr_hw_temp++;
 
-      if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_range & (1 << index)) {
          /* The vertex input/attribute is supposed to be a signed value in
           * the range [-1,1] but we actually fetched/converted it to the
           * range [0,1].  This most likely happens when the app specifies a
@@ -3558,7 +3559,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
             return FALSE;
       }
 
-      if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
          /* move 1 into W position of tmp */
          if (!submit_op1(emit,
                          inst_token(SVGA3DOP_MOV),
@@ -3588,10 +3589,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
    unsigned i;
 
    if (emit->unit == PIPE_SHADER_FRAGMENT) {
-      if (emit->key.fkey.light_twoside)
+      if (emit->key.fs.light_twoside)
          return TRUE;
 
-      if (emit->key.fkey.white_fragments)
+      if (emit->key.fs.white_fragments)
          return TRUE;
 
       if (emit->emit_frontface)
@@ -3606,16 +3607,16 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
          return TRUE;
 
       /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
             return TRUE;
       }
 
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].compare_mode
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].compare_mode
              == PIPE_TEX_COMPARE_R_TO_TEXTURE)
             return TRUE;
       }
@@ -3623,8 +3624,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
    else if (emit->unit == PIPE_SHADER_VERTEX) {
       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
          return TRUE;
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1)
+      if (emit->key.vs.adjust_attrib_range ||
+          emit->key.vs.adjust_attrib_w_1)
          return TRUE;
    }
 
@@ -3772,7 +3773,7 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
       if (!emit_ps_preamble( emit ))
          return FALSE;
 
-      if (emit->key.fkey.light_twoside) {
+      if (emit->key.fs.light_twoside) {
          if (!emit_light_twoside( emit ))
             return FALSE;
       }
@@ -3787,14 +3788,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
    }
    else {
       assert(emit->unit == PIPE_SHADER_VERTEX);
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1) {
-         if (!emit_adjusted_vertex_attribs(emit))
+      if (emit->key.vs.adjust_attrib_range) {
+         if (!emit_adjusted_vertex_attribs(emit) ||
+             emit->key.vs.adjust_attrib_w_1) {
             return FALSE;
+         }
       }
    }
 
-
    return TRUE;
 }
 
@@ -3808,10 +3809,30 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
                               const struct tgsi_token *tokens)
 {
    struct tgsi_parse_context parse;
+   const struct tgsi_token *new_tokens = NULL;
    boolean ret = TRUE;
    boolean helpers_emitted = FALSE;
    unsigned line_nr = 0;
 
+   if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+      unsigned unit;
+
+      new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+      if (new_tokens) {
+         /* Setup texture state for stipple */
+         emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+         emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+         emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+         emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+         emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+         emit->pstipple_sampler_unit = unit;
+
+         tokens = new_tokens;
+      }
+   }
+
    tgsi_parse_init( &parse, tokens );
    emit->internal_imm_count = 0;
 
@@ -3878,5 +3899,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
 
 done:
    tgsi_parse_free( &parse );
+   if (new_tokens) {
+      tgsi_free_tokens(new_tokens);
+   }
+
    return ret;
 }
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 60928d7a790..e4f027b9567 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -281,7 +281,7 @@ free_emitter(struct svga_shader_emitter_v10 *emit)
    FREE(emit);
 }
 
-static INLINE boolean
+static inline boolean
 reserve(struct svga_shader_emitter_v10 *emit,
         unsigned nr_dwords)
 {
@@ -1459,7 +1459,7 @@ absolute_src(const struct tgsi_full_src_register *reg)
 
 
 /** Return the named swizzle term from the src register */
-static INLINE unsigned
+static inline unsigned
 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
 {
    switch (term) {
-- 
cgit v1.2.3


From 9390cb84593bda516e8c1521c87a08475574d1be Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Tue, 1 Sep 2015 22:00:24 -0700
Subject: i965/fs: Handle MRF destinations in lower_integer_multiplication().

The lowered code reads from the destination, which isn't possible from
message registers.

Fixes the following dEQP tests on SNB:

    dEQP-GLES3.functional.shaders.precision.int.highp_mul_fragment
    dEQP-GLES3.functional.shaders.precision.int.mediump_mul_fragment
    dEQP-GLES3.functional.shaders.precision.int.lowp_mul_fragment

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Tested-by:  Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 269914d64a8..e28eb5db026 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3192,7 +3192,8 @@ fs_visitor::lower_integer_multiplication()
              * schedule multi-component multiplications much better.
              */
 
-            if (inst->conditional_mod && inst->dst.is_null()) {
+            fs_reg orig_dst = inst->dst;
+            if (orig_dst.is_null() || orig_dst.file == MRF) {
                inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                   inst->dst.type);
             }
@@ -3258,10 +3259,9 @@ fs_visitor::lower_integer_multiplication()
 
             ibld.ADD(dst, low, high);
 
-            if (inst->conditional_mod) {
-               fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+            if (inst->conditional_mod || orig_dst.file == MRF) {
                set_condmod(inst->conditional_mod,
-                           ibld.MOV(null, inst->dst));
+                           ibld.MOV(orig_dst, inst->dst));
             }
          }
 
-- 
cgit v1.2.3


From b2fd41ce465e16a178d51000b843b5228640b670 Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Thu, 13 Aug 2015 15:08:22 -0700
Subject: svga: fix backed surface view regression

Commit b9ba8492 removes an unneeded pipe_surface_release() from
st_render_texture() and exposes a bug in the backed surface view
creation.  Currently a backed surface view for a conflicted surface view
is created at framebuffer emit time. But if shader sampler views are changed
but framebuffer surface views remain unchanged, emit_framebuffer() will not
be called and conflicted surface views will not be detected.

To fix this, also check for conflicted surface views when setting sampler
views. If there is any conflicted surface views, enable the
framebuffer dirty bit so that the framebuffer emit code has a chance to
create a backed surface view for the conflicted surface view.

Fix cinebench-r11-test regression.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_pipe_sampler.c  | 28 ++++++++++++++++++++++++++-
 src/gallium/drivers/svga/svga_sampler_view.h  |  4 ++--
 src/gallium/drivers/svga/svga_state_sampler.c | 17 ++++++++--------
 src/gallium/drivers/svga/svga_surface.c       | 14 +++++++++-----
 4 files changed, 46 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index bb18f5a8247..ab84ed39602 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -35,6 +35,8 @@
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_sampler_view.h"
 
 
 static inline unsigned
@@ -445,7 +447,31 @@ svga_set_sampler_views(struct pipe_context *pipe,
       svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
       svga->curr.tex_flags.flag_1d = flag_1d;
       svga->curr.tex_flags.flag_srgb = flag_srgb;
-   }  
+   }
+
+   /* Check if any of the sampler view resources collide with the framebuffer
+    * color buffers or depth stencil resource. If so, enable the NEW_FRAME_BUFFER
+    * dirty bit so that emit_framebuffer can be invoked to create backed view
+    * for the conflicted surface view.
+    */
+   for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) {
+      struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
+      if (s) {
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+            break;
+         }
+      }
+   }
+
+   if (svga->curr.framebuffer.zsbuf) {
+      struct svga_surface *s = svga_surface(svga->curr.framebuffer.zsbuf);
+      if (s) {
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+         }
+      }
+   }
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index acd7ae0ca24..4ca7fb781a9 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -100,6 +100,6 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
 
 boolean
 svga_check_sampler_view_resource_collision(struct svga_context *svga,
-                                           struct svga_winsys_surface *res);
-
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader);
 #endif
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index 1c6913e4a26..611d2c6102f 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -63,23 +63,22 @@ svga_resource_handle(struct pipe_resource *res)
  */
 boolean
 svga_check_sampler_view_resource_collision(struct svga_context *svga,
-                                           struct svga_winsys_surface *res)
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader)
 {
    struct pipe_screen *screen = svga->pipe.screen;
-   unsigned shader, i;
+   unsigned i;
 
    if (svga_screen(screen)->debug.no_surface_view) {
       return FALSE;
    }
 
-   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
-      for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
-         struct svga_pipe_sampler_view *sv =
-            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct svga_pipe_sampler_view *sv =
+         svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
 
-         if (sv && res == svga_resource_handle(sv->base.texture)) {
-            return TRUE;
-         }
+      if (sv && res == svga_resource_handle(sv->base.texture)) {
+         return TRUE;
       }
    }
 
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 79981dcf5ff..4b0f9417c32 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -377,6 +377,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
 {
    enum pipe_error ret = PIPE_OK;
    int try;
+   unsigned shader;
 
    assert(svga_have_vgpu10(svga));
 
@@ -388,11 +389,14 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
     * associated resource. We will then use the cloned surface view for
     * render target.
     */
-   if (svga_check_sampler_view_resource_collision(svga, s->handle)) {
-      SVGA_DBG(DEBUG_VIEWS,
-               "same resource used in shaderResource and renderTarget 0x%x\n",
-               s->handle);
-      s = create_backed_surface_view(svga, s);
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+         SVGA_DBG(DEBUG_VIEWS,
+                  "same resource used in shaderResource and renderTarget 0x%x\n",
+                  s->handle);
+         s = create_backed_surface_view(svga, s);
+         break;
+      }
    }
 
    if (s->view_id == SVGA3D_INVALID_ID) {
-- 
cgit v1.2.3


From 5a5e5e39592292a8ffa861e6f9b21934af428b9b Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Fri, 21 Aug 2015 10:36:24 -0700
Subject: svga: increment texture age when surface is to be marked as dirty

Commit b9ba8492 removes an unneeded pipe_surface_release() from
st_render_texture(). This implies a surface can now be reused for a
render buffer. Currently, when we render to a texture, we mark the
surface as dirty. But in svga_mark_surface_dirty(), if the surface
is already marked as dirty, it does not increment the texture age.
Any view to this texture might not be updated properly then.

With this patch, the texture age is incremented regardless of whether
the surface is already marked as dirty or not.

Fix bug 1499181.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 src/gallium/drivers/svga/svga_surface.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 4b0f9417c32..477a938ccdd 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -512,10 +512,9 @@ static void
 svga_mark_surface_dirty(struct pipe_surface *surf)
 {
    struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
 
    if (!s->dirty) {
-      struct svga_texture *tex = svga_texture(surf->texture);
-
       s->dirty = TRUE;
 
       if (s->handle == tex->handle) {
@@ -526,12 +525,12 @@ svga_mark_surface_dirty(struct pipe_surface *surf)
       else {
          /* this will happen later in svga_propagate_surface */
       }
-
-      /* Increment the view_age and texture age for this surface's mipmap
-       * level so that any sampler views into the texture are re-validated too.
-       */
-      svga_age_texture_view(tex, surf->u.tex.level);
    }
+
+   /* Increment the view_age and texture age for this surface's mipmap
+    * level so that any sampler views into the texture are re-validated too.
+    */
+   svga_age_texture_view(tex, surf->u.tex.level);
 }
 
 
-- 
cgit v1.2.3


From 4a9480b64aed6986095c391ebba6b91212fb9277 Mon Sep 17 00:00:00 2001
From: Charmaine Lee <charmainel@vmware.com>
Date: Fri, 21 Aug 2015 11:41:26 -0700
Subject: svga: fix referencing a NULL framebuffer cbuf

Check for a valid framebuffer cbuf pointer before accessing its
associated surface.

Fix piglit test fbo-drawbuffers-none.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_pipe_sampler.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index ab84ed39602..60e2d44ace4 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -455,8 +455,8 @@ svga_set_sampler_views(struct pipe_context *pipe,
     * for the conflicted surface view.
     */
    for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) {
-      struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
-      if (s) {
+      if (svga->curr.framebuffer.cbufs[i]) {
+         struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
          if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
             svga->dirty |= SVGA_NEW_FRAME_BUFFER;
             break;
-- 
cgit v1.2.3


From 4bdd5e09c3a1f97840dbbfb9e9a3c74bb0143968 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 25 May 2015 12:23:05 -0700
Subject: meta: Save/restore compute shaders

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index bde544ef490..e27489d6195 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -599,7 +599,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       /* Save the shader state from ctx->Shader (instead of ctx->_Shader) so
        * that we don't have to worry about the current pipeline state.
        */
-      for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
          _mesa_reference_shader_program(ctx, &save->Shader[i],
                                         ctx->Shader.CurrentProgram[i]);
       }
@@ -949,7 +949,9 @@ _mesa_meta_end(struct gl_context *ctx)
          GL_TESS_EVALUATION_SHADER,
          GL_GEOMETRY_SHADER,
          GL_FRAGMENT_SHADER,
+         GL_COMPUTE_SHADER,
       };
+      STATIC_ASSERT(MESA_SHADER_STAGES == ARRAY_SIZE(targets));
 
       bool any_shader;
 
@@ -975,7 +977,7 @@ _mesa_meta_end(struct gl_context *ctx)
       }
 
       any_shader = false;
-      for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
          /* It is safe to call _mesa_use_shader_program even if the extension
           * necessary for that program state is not supported.  In that case,
           * the saved program object must be NULL and the currently bound
-- 
cgit v1.2.3


From 06ada493fbc22e99867e14f26f1a511b343e3759 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Tue, 23 Sep 2014 16:46:39 -0700
Subject: i965/cs: Setup push constant data for uniforms

brw_upload_cs_push_constants was based on gen6_upload_push_constants.

v2:
 * Add FINISHME comments about more efficient ways to push uniforms

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_context.h      |   2 +-
 src/mesa/drivers/dri/i965/brw_cs.cpp         | 132 ++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_defines.h      |   6 ++
 src/mesa/drivers/dri/i965/brw_state.h        |   1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |   2 +
 5 files changed, 138 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index b52bca77460..02e7bb4f8e7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1517,7 +1517,7 @@ struct brw_context
 
    int num_atoms[BRW_NUM_PIPELINES];
    const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[4];
+   const struct brw_tracked_state compute_atoms[5];
 
    /* If (INTEL_DEBUG & DEBUG_BATCH) */
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index cd7e0942277..8197e6a69dd 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -331,6 +331,10 @@ brw_upload_cs_state(struct brw_context *brw)
                                             prog_data->binding_table.size_bytes,
                                             32, &stage_state->bind_bo_offset);
 
+   unsigned push_constant_data_size =
+      prog_data->nr_params * sizeof(gl_constant_value);
+   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+   unsigned push_constant_regs = reg_aligned_constant_size / 32;
    unsigned threads = get_cs_thread_count(cs_prog_data);
 
    uint32_t dwords = brw->gen < 8 ? 8 : 9;
@@ -363,12 +367,41 @@ brw_upload_cs_state(struct brw_context *brw)
 
    OUT_BATCH(0);
    const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
-   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC));
+
+   /* We are uploading duplicated copies of push constant uniforms for each
+    * thread. Although the local id data needs to vary per thread, it won't
+    * change for other uniform data. Unfortunately this duplication is
+    * required for gen7. As of Haswell, this duplication can be avoided, but
+    * this older mechanism with duplicated data continues to work.
+    *
+    * FINISHME: As of Haswell, we could make use of the
+    * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
+    * to only store one copy of uniform data.
+    *
+    * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
+    * which is described in the GPGPU_WALKER command and in the Broadwell PRM
+    * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
+    * Operations => GPGPU Mode => Indirect Payload Storage.
+    *
+    * Note: The constant data is built in brw_upload_cs_push_constants below.
+    */
+   const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
+   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
+             SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
    OUT_BATCH(0);
    OUT_BATCH(0);
    OUT_BATCH(0);
    ADVANCE_BATCH();
 
+   if (reg_aligned_constant_size > 0) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(reg_aligned_constant_size * threads);
+      OUT_BATCH(stage_state->push_const_offset);
+      ADVANCE_BATCH();
+   }
+
    /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
    memcpy(bind, stage_state->surf_offset,
           prog_data->binding_table.size_bytes);
@@ -382,7 +415,7 @@ brw_upload_cs_state(struct brw_context *brw)
    desc[dw++] = 0;
    desc[dw++] = 0;
    desc[dw++] = stage_state->bind_bo_offset;
-   desc[dw++] = 0;
+   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
    const uint32_t media_threads =
       brw->gen >= 8 ?
       SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
@@ -404,8 +437,99 @@ const struct brw_tracked_state brw_cs_state = {
    /* explicit initialisers aren't valid C++, comment
     * them for documentation purposes */
    /* .dirty = */{
-      /* .mesa = */ 0,
-      /* .brw = */  BRW_NEW_CS_PROG_DATA,
+      /* .mesa = */ _NEW_PROGRAM_CONSTANTS,
+      /* .brw = */  BRW_NEW_CS_PROG_DATA |
+                    BRW_NEW_PUSH_CONSTANT_ALLOCATION,
    },
    /* .emit = */ brw_upload_cs_state
 };
+
+
+/**
+ * Creates a region containing the push constants for the CS on gen7+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
+ * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
+ * gen6+.
+ */
+static void
+brw_upload_cs_push_constants(struct brw_context *brw,
+                             const struct gl_program *prog,
+                             const struct brw_cs_prog_data *cs_prog_data,
+                             struct brw_stage_state *stage_state,
+                             enum aub_state_struct_type type)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_stage_prog_data *prog_data =
+      (brw_stage_prog_data*) cs_prog_data;
+
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, prog->Parameters);
+
+   if (prog_data->nr_params == 0) {
+      stage_state->push_const_size = 0;
+   } else {
+      gl_constant_value *param;
+      unsigned i, t;
+
+      const unsigned push_constant_data_size =
+         prog_data->nr_params * sizeof(gl_constant_value);
+      const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+      const unsigned param_aligned_count =
+         reg_aligned_constant_size / sizeof(*param);
+
+      unsigned threads = get_cs_thread_count(cs_prog_data);
+
+      param = (gl_constant_value*)
+         brw_state_batch(brw, type,
+                         reg_aligned_constant_size * threads,
+                         32, &stage_state->push_const_offset);
+      assert(param);
+
+      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+      /* _NEW_PROGRAM_CONSTANTS */
+      for (t = 0; t < threads; t++) {
+         for (i = 0; i < prog_data->nr_params; i++) {
+            param[t * param_aligned_count + i] = *prog_data->param[i];
+         }
+      }
+
+      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+   }
+}
+
+
+static void
+gen7_upload_cs_push_constants(struct brw_context *brw)
+{
+   struct brw_stage_state *stage_state = &brw->cs.base;
+
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct brw_compute_program *cp =
+      (struct brw_compute_program *) brw->compute_program;
+
+   if (cp) {
+      /* CACHE_NEW_CS_PROG */
+      struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+
+      brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
+                                   stage_state, AUB_TRACE_WM_CONSTANTS);
+   }
+}
+
+
+const struct brw_tracked_state gen7_cs_push_constants = {
+   /* .dirty = */{
+      /* .mesa  = */ _NEW_PROGRAM_CONSTANTS,
+      /* .brw   = */ BRW_NEW_COMPUTE_PROGRAM |
+                     BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   },
+   /* .emit = */ gen7_upload_cs_push_constants,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 07fe1983ef2..e6f2a2e0f2c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2658,7 +2658,13 @@ enum brw_wm_barycentric_interp_mode {
 # define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
 # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
 
+#define MEDIA_CURBE_LOAD                        0x7001
 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+/* GEN7 DW4, GEN8+ DW5 */
+# define MEDIA_CURBE_READ_LENGTH_SHIFT          16
+# define MEDIA_CURBE_READ_LENGTH_MASK           INTEL_MASK(31, 16)
+# define MEDIA_CURBE_READ_OFFSET_SHIFT          0
+# define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
 /* GEN7 DW5, GEN8+ DW6 */
 # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
 # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 78a1f874b4e..7ae9a5c33b3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -99,6 +99,7 @@ extern const struct brw_tracked_state brw_indices;
 extern const struct brw_tracked_state brw_vertices;
 extern const struct brw_tracked_state brw_index_buffer;
 extern const struct brw_tracked_state brw_cs_state;
+extern const struct brw_tracked_state gen7_cs_push_constants;
 extern const struct brw_tracked_state gen6_binding_table_pointers;
 extern const struct brw_tracked_state gen6_blend_state;
 extern const struct brw_tracked_state gen6_cc_state_pointers;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 9de42ce8503..b2ca9c2c0e7 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -258,6 +258,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
    &brw_state_base_address,
    &brw_cs_image_surfaces,
+   &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_cs_state,
 };
@@ -348,6 +349,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
    &gen8_state_base_address,
    &brw_cs_image_surfaces,
+   &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_cs_state,
 };
-- 
cgit v1.2.3


From 8fafb0a67faa548cb16e122e214912a17835e369 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 18 Aug 2015 17:41:30 -0700
Subject: mesa: Fix warning about static being in the wrong place

Because the compiler already has enough things to complain about.

    grep -rl 'const static' src/ | while read f
    do
        sed --in-place -e 's/const static/static const/g' $f
    done

brw_eu_emit.c: In function 'brw_reg_type_to_hw_type':
brw_eu_emit.c:98:7: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
       const static int imm_hw_types[] = {
       ^
brw_eu_emit.c:120:7: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
       const static int hw_types[] = {
       ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/egl/wayland/wayland-drm/wayland-drm.c    | 2 +-
 src/gallium/drivers/i915/i915_fpc_optimize.c | 2 +-
 src/glsl/builtin_types.cpp                   | 2 +-
 src/glsl/ir_reader.cpp                       | 2 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c      | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/egl/wayland/wayland-drm/wayland-drm.c b/src/egl/wayland/wayland-drm/wayland-drm.c
index e9c6e0a6507..e00d9be6a91 100644
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@@ -197,7 +197,7 @@ drm_authenticate(struct wl_client *client,
 		wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
 }
 
-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
 	drm_authenticate,
 	drm_create_buffer,
         drm_create_planar_buffer,
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index 83bb64918d4..a2b6d272939 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -69,7 +69,7 @@ static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_
            d1->Register.Negate == d2->Register.Negate);
 }
 
-const static struct {
+static const struct {
    boolean is_texture;
    boolean commutes;
    unsigned neutral_element;
diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp
index 0d0d71d56df..0aedbb3546a 100644
--- a/src/glsl/builtin_types.cpp
+++ b/src/glsl/builtin_types.cpp
@@ -127,7 +127,7 @@ static const struct glsl_struct_field gl_FogParameters_fields[] = {
 #define T(TYPE, MIN_GL, MIN_ES) \
    { glsl_type::TYPE##_type, MIN_GL, MIN_ES },
 
-const static struct builtin_type_versions {
+static const struct builtin_type_versions {
    const glsl_type *const type;
    int min_gl;
    int min_es;
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index 469837f5e4c..979653304cc 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -26,7 +26,7 @@
 #include "glsl_types.h"
 #include "s_expression.h"
 
-const static bool debug = false;
+static const bool debug = false;
 
 namespace {
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 4d397622fc1..637fd074ff1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -95,7 +95,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
                         enum brw_reg_type type, unsigned file)
 {
    if (file == BRW_IMMEDIATE_VALUE) {
-      const static int imm_hw_types[] = {
+      static const int imm_hw_types[] = {
          [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
          [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
          [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
@@ -117,7 +117,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
       return imm_hw_types[type];
    } else {
       /* Non-immediate registers */
-      const static int hw_types[] = {
+      static const int hw_types[] = {
          [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
          [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
          [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
-- 
cgit v1.2.3


From 4ff9e599cb05d2e33e3c642ba2ded9afbe76d552 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 19 Aug 2015 13:36:22 -0700
Subject: linker: Silence GCC unused parameter warnings

linker.cpp:320:55: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_leave(ir_function *ir)
                                                       ^
linker.cpp:327:53: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_leave(ir_return *ir)
                                                     ^
linker.cpp:333:49: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_enter(ir_if *ir)
                                                 ^
linker.cpp:339:49: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_leave(ir_if *ir)
                                                 ^
linker.cpp:345:51: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_enter(ir_loop *ir)
                                                   ^
linker.cpp:351:51: warning: unused parameter 'ir' [-Wunused-parameter]
    virtual ir_visitor_status visit_leave(ir_loop *ir)
                                                   ^
linker.cpp:2824:53: warning: unused parameter 'ctx' [-Wunused-parameter]
 link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_program *prog)
                                                     ^
linker.cpp:2854:47: warning: unused parameter 'ctx' [-Wunused-parameter]
 check_subroutine_resources(struct gl_context *ctx, struct gl_shader_program *prog)
                                               ^
linker.cpp:3368:49: warning: unused parameter 'ctx' [-Wunused-parameter]
 link_assign_subroutine_types(struct gl_context *ctx,
                                                 ^

Also make link_assign_subroutine_types static since it is only called
from this file.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/linker.cpp | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 47f7d2574dd..bc5e55b4b21 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -317,38 +317,38 @@ public:
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_leave(ir_function *ir)
+   virtual ir_visitor_status visit_leave(ir_function *)
    {
       in_main = false;
       after_return = false;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_leave(ir_return *ir)
+   virtual ir_visitor_status visit_leave(ir_return *)
    {
       after_return = true;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_enter(ir_if *ir)
+   virtual ir_visitor_status visit_enter(ir_if *)
    {
       ++control_flow;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_leave(ir_if *ir)
+   virtual ir_visitor_status visit_leave(ir_if *)
    {
       --control_flow;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_enter(ir_loop *ir)
+   virtual ir_visitor_status visit_enter(ir_loop *)
    {
       ++control_flow;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_leave(ir_loop *ir)
+   virtual ir_visitor_status visit_leave(ir_loop *)
    {
       --control_flow;
       return visit_continue;
@@ -2821,7 +2821,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 }
 
 static void
-link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_program *prog)
+link_calculate_subroutine_compat(struct gl_shader_program *prog)
 {
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_shader *sh = prog->_LinkedShaders[i];
@@ -2851,7 +2851,7 @@ link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_progra
 }
 
 static void
-check_subroutine_resources(struct gl_context *ctx, struct gl_shader_program *prog)
+check_subroutine_resources(struct gl_shader_program *prog)
 {
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_shader *sh = prog->_LinkedShaders[i];
@@ -3364,9 +3364,8 @@ validate_sampler_array_indexing(struct gl_context *ctx,
    return true;
 }
 
-void
-link_assign_subroutine_types(struct gl_context *ctx,
-                             struct gl_shader_program *prog)
+static void
+link_assign_subroutine_types(struct gl_shader_program *prog)
 {
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       gl_shader *sh = prog->_LinkedShaders[i];
@@ -3588,7 +3587,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    }
 
    check_explicit_uniform_locations(ctx, prog);
-   link_assign_subroutine_types(ctx, prog);
+   link_assign_subroutine_types(prog);
 
    if (!prog->LinkStatus)
       goto done;
@@ -3848,9 +3847,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    link_assign_atomic_counter_resources(ctx, prog);
    store_fragdepth_layout(prog);
 
-   link_calculate_subroutine_compat(ctx, prog);
+   link_calculate_subroutine_compat(prog);
    check_resources(ctx, prog);
-   check_subroutine_resources(ctx, prog);
+   check_subroutine_resources(prog);
    check_image_resources(ctx, prog);
    link_check_atomic_counter_resources(ctx, prog);
 
-- 
cgit v1.2.3


From 97ce8bd437a138c102fcf466d03101c0437fa6ea Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 19 Aug 2015 13:54:21 -0700
Subject: i965: Make gen8_upload_state_base_address static

All of the other state upload functions are static because the only use
is in the brw_tracked_state structure.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_state.h       | 4 ----
 src/mesa/drivers/dri/i965/gen8_misc_state.c | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 7ae9a5c33b3..b6f565702cb 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -178,10 +178,6 @@ void brw_upload_invariant_state(struct brw_context *brw);
 uint32_t
 brw_depthbuffer_format(struct brw_context *brw);
 
-/* gen8_misc_state.c */
-void gen8_upload_state_base_address(struct brw_context *brw);
-
-
 /***********************************************************************
  * brw_state.c
  */
diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
index b20038eaacf..a46b252277e 100644
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -29,7 +29,8 @@
 /**
  * Define the base addresses which some state is referenced from.
  */
-void gen8_upload_state_base_address(struct brw_context *brw)
+static void
+gen8_upload_state_base_address(struct brw_context *brw)
 {
    uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
    int pkt_len = brw->gen >= 9 ? 19 : 16;
-- 
cgit v1.2.3


From 1e4d3d25fff65b2508ae12b4b4d6b64057dabe95 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 19 Aug 2015 14:25:48 -0700
Subject: i965: Make gen7_enable_hw_binding_tables static

All of the other state upload functions are static because the only use
is in the brw_tracked_state structure.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c | 2 +-
 src/mesa/drivers/dri/i965/brw_state.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index b188fc7de57..508f1f019ae 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -311,7 +311,7 @@ gen7_disable_hw_binding_tables(struct brw_context *brw)
 /**
  * Enable hardware binding tables and set up the binding table pool.
  */
-void
+static void
 gen7_enable_hw_binding_tables(struct brw_context *brw)
 {
    if (!brw->use_resource_streamer)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index b6f565702cb..97b81f08b2e 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -374,7 +374,6 @@ void gen7_update_binding_table_from_array(struct brw_context *brw,
                                           gl_shader_stage stage,
                                           const uint32_t* binding_table,
                                           int num_surfaces);
-void gen7_enable_hw_binding_tables(struct brw_context *brw);
 void gen7_disable_hw_binding_tables(struct brw_context *brw);
 void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
 
-- 
cgit v1.2.3


From 8ba3b7661b4985194a01f565d2ae27db153bb12c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 26 Aug 2015 13:55:54 +0100
Subject: mesa: Remove target parameter from _mesa_handle_bind_buffer_gen

main/bufferobj.c: In function '_mesa_handle_bind_buffer_gen':
main/bufferobj.c:915:37: warning: unused parameter 'target' [-Wunused-parameter]
                              GLenum target,
                                     ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/bufferobj.c | 7 +++----
 src/mesa/main/bufferobj.h | 1 -
 src/mesa/main/varray.c    | 3 +--
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index e17b41ce55c..243c47b23fc 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -912,7 +912,6 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
 
 bool
 _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
-                             GLenum target,
                              GLuint buffer,
                              struct gl_buffer_object **buf_handle,
                              const char *caller)
@@ -975,7 +974,7 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
    else {
       /* non-default buffer object */
       newBufObj = _mesa_lookup_bufferobj(ctx, buffer);
-      if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+      if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                         &newBufObj, "glBindBuffer"))
          return;
    }
@@ -4234,7 +4233,7 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
    } else {
       bufObj = _mesa_lookup_bufferobj(ctx, buffer);
    }
-   if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+   if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                      &bufObj, "glBindBufferRange"))
       return;
 
@@ -4286,7 +4285,7 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer)
    } else {
       bufObj = _mesa_lookup_bufferobj(ctx, buffer);
    }
-   if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+   if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                      &bufObj, "glBindBufferBase"))
       return;
 
diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h
index b5d73aec072..b7154c5d889 100644
--- a/src/mesa/main/bufferobj.h
+++ b/src/mesa/main/bufferobj.h
@@ -74,7 +74,6 @@ _mesa_free_buffer_objects(struct gl_context *ctx);
 
 extern bool
 _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
-                             GLenum target,
                              GLuint buffer,
                              struct gl_buffer_object **buf_handle,
                              const char *caller);
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 3bab9850588..7a1dddc9ccf 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -1698,8 +1698,7 @@ vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object
        * Otherwise, we fall back to the same compat profile behavior as other
        * object references (automatically gen it).
        */
-      if (!_mesa_handle_bind_buffer_gen(ctx, GL_ARRAY_BUFFER, buffer,
-                                        &vbo, func))
+      if (!_mesa_handle_bind_buffer_gen(ctx, buffer, &vbo, func))
          return;
    } else {
       /* The ARB_vertex_attrib_binding spec says:
-- 
cgit v1.2.3


From 882aab00abb226c103b8c6fe514247334b4d2d04 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 26 Aug 2015 13:50:04 +0100
Subject: mesa: Silence unused parameter warnings in bufferobj.c

main/bufferobj.c: In function 'count_buffer_size':
main/bufferobj.c:520:26: warning: unused parameter 'key' [-Wunused-parameter]
 count_buffer_size(GLuint key, void *data, void *userData)
                          ^
main/bufferobj.c: In function 'flush_mapped_buffer_range_fallback':
main/bufferobj.c:740:56: warning: unused parameter 'index' [-Wunused-parameter]
                                    gl_map_buffer_index index)
                                                        ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/bufferobj.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 243c47b23fc..7c6c70ac1f8 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -523,6 +523,7 @@ count_buffer_size(GLuint key, void *data, void *userData)
       (const struct gl_buffer_object *) data;
    GLuint *total = (GLuint *) userData;
 
+   (void) key;
    *total = *total + bufObj->Size;
 }
 
@@ -742,6 +743,7 @@ flush_mapped_buffer_range_fallback(struct gl_context *ctx,
    (void) offset;
    (void) length;
    (void) obj;
+   (void) index;
    /* no-op */
 }
 
-- 
cgit v1.2.3


From a6976f09727014730f45ec27c714c6a8140e074a Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 31 Aug 2015 18:30:48 -0700
Subject: mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type

This matches _mesa_uniform, and it enables the bug fix in the next
patch.

v2: s/type/basicType/ in the assert in _mesa_uniform_matrix.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au> [v1]
Cc: Dave Airlie <airlied@redhat.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/main/uniform_query.cpp | 10 +++---
 src/mesa/main/uniforms.c        | 72 ++++++++++++++++++++---------------------
 src/mesa/main/uniforms.h        |  2 +-
 3 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index 10266189259..fc2b5f57758 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -873,7 +873,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
                      GLboolean transpose,
-                     const GLvoid *values, GLenum type)
+                     const GLvoid *values, enum glsl_base_type basicType)
 {
    unsigned offset;
    unsigned vectors;
@@ -892,8 +892,8 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
       return;
    }
 
-   assert(type == GL_FLOAT || type == GL_DOUBLE);
-   size_mul = type == GL_DOUBLE ? 2 : 1;
+   assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+   size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
 
    assert(!uni->type->is_sampler());
    vectors = uni->type->matrix_columns;
@@ -948,7 +948,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
    if (!transpose) {
       memcpy(&uni->storage[elements * offset], values,
 	     sizeof(uni->storage[0]) * elements * count * size_mul);
-   } else if (type == GL_FLOAT) {
+   } else if (basicType == GLSL_TYPE_FLOAT) {
       /* Copy and transpose the matrix.
        */
       const float *src = (const float *)values;
@@ -965,7 +965,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 	 src += elements;
       }
    } else {
-      assert(type == GL_DOUBLE);
+      assert(basicType == GLSL_TYPE_DOUBLE);
       const double *src = (const double *)values;
       double *dst = (double *)&uni->storage[elements * offset].f;
 
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 10819e2e21a..973b877befb 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -553,7 +553,7 @@ _mesa_UniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_FLOAT);
+			2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -562,7 +562,7 @@ _mesa_UniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_FLOAT);
+			3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -571,7 +571,7 @@ _mesa_UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_FLOAT);
+			4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -683,7 +683,7 @@ _mesa_ProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -694,7 +694,7 @@ _mesa_ProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -705,7 +705,7 @@ _mesa_ProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 
@@ -718,7 +718,7 @@ _mesa_UniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_FLOAT);
+			2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -727,7 +727,7 @@ _mesa_UniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_FLOAT);
+			3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -736,7 +736,7 @@ _mesa_UniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_FLOAT);
+			2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -745,7 +745,7 @@ _mesa_UniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_FLOAT);
+			4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -754,7 +754,7 @@ _mesa_UniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_FLOAT);
+			3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -763,7 +763,7 @@ _mesa_UniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_FLOAT);
+			4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -776,7 +776,7 @@ _mesa_ProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -787,7 +787,7 @@ _mesa_ProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -798,7 +798,7 @@ _mesa_ProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -809,7 +809,7 @@ _mesa_ProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -820,7 +820,7 @@ _mesa_ProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -831,7 +831,7 @@ _mesa_ProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 
@@ -1303,7 +1303,7 @@ _mesa_UniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_DOUBLE);
+			2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1312,7 +1312,7 @@ _mesa_UniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_DOUBLE);
+			3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1321,7 +1321,7 @@ _mesa_UniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_DOUBLE);
+			4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1330,7 +1330,7 @@ _mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_DOUBLE);
+			2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1339,7 +1339,7 @@ _mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_DOUBLE);
+			3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1348,7 +1348,7 @@ _mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_DOUBLE);
+			2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1357,7 +1357,7 @@ _mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_DOUBLE);
+			4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1366,7 +1366,7 @@ _mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_DOUBLE);
+			3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1375,7 +1375,7 @@ _mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_DOUBLE);
+			4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1481,7 +1481,7 @@ _mesa_ProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1492,7 +1492,7 @@ _mesa_ProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1503,7 +1503,7 @@ _mesa_ProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1514,7 +1514,7 @@ _mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1525,7 +1525,7 @@ _mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1536,7 +1536,7 @@ _mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1547,7 +1547,7 @@ _mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1558,7 +1558,7 @@ _mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1569,5 +1569,5 @@ _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index e62eaa53ccc..c3c9c1e7dd8 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -355,7 +355,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
                      GLboolean transpose,
-                     const GLvoid *values, GLenum type);
+                     const GLvoid *values, enum glsl_base_type basicType);
 
 void
 _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
-- 
cgit v1.2.3


From 7237c937af3b495191bee2f7240901e3a9daf1fb Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 31 Aug 2015 18:44:42 -0700
Subject: mesa: Don't allow wrong type setters for matrix uniforms

Previously we would allow glUniformMatrix4fv on a dmat4 and
glUniformMatrix4dv on a mat4.  Both are illegal.  That later also
overwrites the storage for the mat4 and causes bad things to happen.

Should fix the (new) arb_gpu_shader_fp64-wrong-type-setter piglit test.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Cc: Dave Airlie <airlied@redhat.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/main/uniform_query.cpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index fc2b5f57758..0bee59455a3 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -919,6 +919,31 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
       }
    }
 
+   /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+    * says:
+    *
+    *     "If any of the following conditions occur, an INVALID_OPERATION
+    *     error is generated by the Uniform* commands, and no uniform values
+    *     are changed:
+    *
+    *     ...
+    *
+    *     - if the uniform declared in the shader is not of type boolean and
+    *       the type indicated in the name of the Uniform* command used does
+    *       not match the type of the uniform"
+    *
+    * There are no Boolean matrix types, so we do not need to allow
+    * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+    */
+   if (uni->type->base_type != basicType) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+                  cols, rows, uni->name, location,
+                  glsl_type_name(uni->type->base_type),
+                  glsl_type_name(basicType));
+      return;
+   }
+
    if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
       log_uniform(values, uni->type->base_type, components, vectors, count,
 		  bool(transpose), shProg, location, uni);
-- 
cgit v1.2.3


From 6e3730452152ca74ad3b51bf1dd4b69b20dbc019 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 1 Sep 2015 17:42:31 -0700
Subject: i965/meta: Fix typo in comment

Trivial.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index aa6df16eb04..cbbb919c6ee 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -202,7 +202,7 @@ setup_bounding_rect(GLuint prog, const struct blit_dims *dims)
 
 /**
  * Setup uniforms telling the destination width, height and the offset. These
- * are needed to unnoormalize the input coordinates and to correctly translate
+ * are needed to unnormalize the input coordinates and to correctly translate
  * between destination and source that may have differing offsets.
  */
 static void
-- 
cgit v1.2.3


From c3294ca5a13cf3f0eb3d9907a46ff8ce4bc2963b Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 2 Sep 2015 10:42:57 -0700
Subject: glsl: Handle attribute aliasing in attribute storage limit check.

In various versions of OpenGL and GLSL, it's possible to declare
multiple VS input variables with aliasing attribute locations.

So, when computing the storage requirements for vertex attributes,
we can't simply add up the sizes.  Instead, we need to look at the
enabled slots.

This patch begins tracking which attributes are double types that
are larger than 128-bits (i.e. take up two vec4 slots).  We then
count normal attributes once, and count the double-size attributes
a second time.

Fixes deQP functional.attribute_location.bind_aliasing.max_cond_* tests
on i965, which regressed with commit ad208d975a6d3aebe14f7c2c16039ee20.

No Piglit changes on llvmpipe (which actually supports dvecs).

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Tested-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/linker.cpp | 64 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index bc5e55b4b21..01554bc2ad7 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2339,6 +2339,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
     */
    unsigned used_locations = (max_index >= 32)
       ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;
 
    assert((target_index == MESA_SHADER_VERTEX)
 	  || (target_index == MESA_SHADER_FRAGMENT));
@@ -2452,34 +2453,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 
       const unsigned slots = var->type->count_attribute_slots();
 
-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
-       *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-	 total_attribs_size += slots;
-	 if (var->type->without_array() == glsl_type::dvec3_type ||
-	     var->type->without_array() == glsl_type::dvec4_type ||
-	     var->type->without_array() == glsl_type::dmat2x3_type ||
-	     var->type->without_array() == glsl_type::dmat2x4_type ||
-	     var->type->without_array() == glsl_type::dmat3_type ||
-	     var->type->without_array() == glsl_type::dmat3x4_type ||
-	     var->type->without_array() == glsl_type::dmat4x3_type ||
-	     var->type->without_array() == glsl_type::dmat4_type)
-	    total_attribs_size += slots;
-      }
-
       /* If the variable is not a built-in and has a location statically
        * assigned in the shader (presumably via a layout qualifier), make sure
        * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2594,6 +2567,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	    }
 
 	    used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
 	 }
 
 	 continue;
@@ -2605,6 +2610,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    }
 
    if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
       if (total_attribs_size > max_index) {
 	 linker_error(prog,
 		      "attempt to use %d vertex attribute slots only %d available ",
-- 
cgit v1.2.3


From 30e84530a097278c7cf01c0491dba5866510c4c5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 3 Sep 2015 00:55:40 -0700
Subject: glsl: Remove unused total_attribs_size variable.

Accidentally left behind by my previous patch.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/linker.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 01554bc2ad7..59e3e9c38a6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2390,7 +2390,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    } to_assign[16];
 
    unsigned num_attr = 0;
-   unsigned total_attribs_size = 0;
 
    foreach_in_list(ir_instruction, node, sh->ir) {
       ir_variable *const var = node->as_variable();
-- 
cgit v1.2.3


From ce181aea6cb5353181add7b5aca3c0b196a9b513 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 2 Sep 2015 11:29:16 +0100
Subject: mesa/pbo: Handle zero width, height or depth when validating access

It's legal to call glTexSubImage with zero values for the width,
height or depth. Previously this was breaking the PBO access
validation because it tries to work out the last pixel accessed by
getting the pixel at height-1 and depth-1 which would end up with
bogus values.

This was causing GL errors to be generated during the Piglit
texsubimage test, although the test was passing anyway.

v2: Also check for width == 0. Don't validate the start pointer if any
    of the dimensions are zero.
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/pbo.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index 0c160253201..7762324a4ae 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -103,6 +103,12 @@ _mesa_validate_pbo_access(GLuint dimensions,
       /* no buffer! */
       return GL_FALSE;
 
+   /* If the size of the image is zero then no pixels are accessed so we
+    * don't need to check anything else.
+    */
+   if (width == 0 || height == 0 || depth == 0)
+      return GL_TRUE;
+
    /* get the offset to the first pixel we'll read/write */
    start = _mesa_image_offset(dimensions, pack, width, height,
                               format, type, 0, 0, 0);
-- 
cgit v1.2.3


From 7d475bad66b99e171542bc9ea62abac56abfa6f2 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:27 +0300
Subject: r600g: make all scissor states use single atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested by Marek Olšák, we can use single atom to track all scissor
states. This will allow to simplify dirty atom handling later.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c   | 36 ++++++++++++++--------
 src/gallium/drivers/r600/r600_blit.c         |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c   |  4 ++-
 src/gallium/drivers/r600/r600_pipe.h         |  8 ++---
 src/gallium/drivers/r600/r600_state.c        | 46 +++++++++++++++++-----------
 src/gallium/drivers/r600/r600_state_common.c |  6 ++--
 6 files changed, 62 insertions(+), 40 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0d4b5987246..3a7f583ecfa 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -892,27 +892,38 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
 					const struct pipe_scissor_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_scissor_state *rstate = &rctx->scissor;
 	int i;
 
-	for (i = start_slot; i < start_slot + num_scissors; i++) {
-		rctx->scissor[i].scissor = state[i - start_slot];
-		r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
-	}
+	for (i = start_slot; i < start_slot + num_scissors; i++)
+		rstate->scissor[i] = state[i - start_slot];
+	rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+	rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
+	r600_mark_atom_dirty(rctx, &rstate->atom);
 }
 
 static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-	struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
-	struct pipe_scissor_state *state = &rstate->scissor;
-	unsigned offset = rstate->idx * 4 * 2;
+	struct r600_scissor_state *rstate = &rctx->scissor;
+	struct pipe_scissor_state *state;
+	uint32_t dirty_mask;
+	unsigned i, offset;
 	uint32_t tl, br;
 
-	evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
+	dirty_mask = rstate->dirty_mask;
+	while (dirty_mask != 0) {
+		i = u_bit_scan(&dirty_mask);
+		state = &rstate->scissor[i];
+		evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
 
-	radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
-	radeon_emit(cs, tl);
-	radeon_emit(cs, br);
+		offset = i * 4 * 2;
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+		radeon_emit(cs, tl);
+		radeon_emit(cs, br);
+	}
+	rstate->dirty_mask = 0;
+	rstate->atom.num_dw = 0;
 }
 
 /**
@@ -3491,11 +3502,10 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
 	r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
 	r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
+	r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 0);
 	for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
 		r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-		r600_init_atom(rctx, &rctx->scissor[i].atom, id++, evergreen_emit_scissor_state, 4);
 		rctx->viewport[i].idx = i;
-		rctx->scissor[i].idx = i;
 	}
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 08b2f644cad..8774cb5d655 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -66,7 +66,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 
 	if (op & R600_SAVE_FRAGMENT_STATE) {
 		util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state);
-		util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor);
+		util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor[0]);
 		util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
 		util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
 		util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 2fe29e91c4f..c5403252d36 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -308,8 +308,10 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
 	r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
 	r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+	ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+	ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
+	r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
 	for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-		r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
 		r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
 	}
 	if (ctx->b.chip_class < EVERGREEN) {
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 8d5fd99e65a..e09bee15d46 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 75
+#define R600_NUM_ATOMS 60
 
 #define R600_MAX_VIEWPORTS 16
 
@@ -393,9 +393,9 @@ struct r600_cso_state
 struct r600_scissor_state
 {
 	struct r600_atom		atom;
-	struct pipe_scissor_state	scissor;
+	struct pipe_scissor_state	scissor[R600_MAX_VIEWPORTS];
+	uint32_t			dirty_mask;
 	bool				enable; /* r6xx only */
-	int idx;
 };
 
 struct r600_fetch_shader {
@@ -458,7 +458,7 @@ struct r600_context {
 	struct r600_poly_offset_state	poly_offset_state;
 	struct r600_cso_state		rasterizer_state;
 	struct r600_sample_mask		sample_mask;
-	struct r600_scissor_state	scissor[R600_MAX_VIEWPORTS];
+	struct r600_scissor_state	scissor;
 	struct r600_seamless_cube_map	seamless_cube_map;
 	struct r600_config_state	config_state;
 	struct r600_stencil_ref_state	stencil_ref;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f2d24a3e37a..64a22e6d305 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -769,21 +769,32 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
 static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-	struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
-	struct pipe_scissor_state *state = &rstate->scissor;
-	unsigned offset = rstate->idx * 4 * 2;
+	struct r600_scissor_state *rstate = &rctx->scissor;
+	struct pipe_scissor_state *state;
+	uint32_t dirty_mask;
+	unsigned i, offset;
 
-	if (rctx->b.chip_class != R600 || rctx->scissor[0].enable) {
-		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
-		radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
-				     S_028240_WINDOW_OFFSET_DISABLE(1));
-		radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
-	} else {
+	if (rctx->b.chip_class == R600 && !rctx->scissor.enable) {
 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
 		radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
 				     S_028240_WINDOW_OFFSET_DISABLE(1));
 		radeon_emit(cs, S_028244_BR_X(8192) | S_028244_BR_Y(8192));
+		return;
+	}
+
+	dirty_mask = rstate->dirty_mask;
+	while (dirty_mask != 0)
+	{
+		i = u_bit_scan(&dirty_mask);
+		offset = i * 4 * 2;
+		state = &rstate->scissor[i];
+		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+		radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
+				     S_028240_WINDOW_OFFSET_DISABLE(1));
+		radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
 	}
+	rstate->dirty_mask = 0;
+	rstate->atom.num_dw = 0;
 }
 
 static void r600_set_scissor_states(struct pipe_context *ctx,
@@ -792,18 +803,18 @@ static void r600_set_scissor_states(struct pipe_context *ctx,
                                     const struct pipe_scissor_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_scissor_state *rstate = &rctx->scissor;
 	int i;
 
-	for (i = start_slot ; i < start_slot + num_scissors; i++) {
-		rctx->scissor[i].scissor = state[i - start_slot];
-	}
+	for (i = start_slot ; i < start_slot + num_scissors; i++)
+		rstate->scissor[i] = state[i - start_slot];
+	rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+	rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
 
-	if (rctx->b.chip_class == R600 && !rctx->scissor[0].enable)
+	if (rctx->b.chip_class == R600 && !rstate->enable)
 		return;
 
-	for (i = start_slot ; i < start_slot + num_scissors; i++) {
-		r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
-	}
+	r600_mark_atom_dirty(rctx, &rstate->atom);
 }
 
 static struct r600_resource *r600_buffer_create_helper(struct r600_screen *rscreen,
@@ -3065,10 +3076,9 @@ void r600_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
 	r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
 	r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
+	r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 0);
 	for (i = 0;i < R600_MAX_VIEWPORTS; i++) {
-		r600_init_atom(rctx, &rctx->scissor[i].atom, id++, r600_emit_scissor_state, 4);
 		r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-		rctx->scissor[i].idx = i;
 		rctx->viewport[i].idx = i;
 	}
 	r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 24ed74b40d6..d2b6ebec6a4 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -372,9 +372,9 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
 
 	/* Workaround for a missing scissor enable on r600. */
 	if (rctx->b.chip_class == R600 &&
-	    rs->scissor_enable != rctx->scissor[0].enable) {
-		rctx->scissor[0].enable = rs->scissor_enable;
-		r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
+	    rs->scissor_enable != rctx->scissor.enable) {
+		rctx->scissor.enable = rs->scissor_enable;
+		r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
 	}
 
 	/* Re-emit PA_SC_LINE_STIPPLE. */
-- 
cgit v1.2.3


From fbb423b43380d0e43d14056e96846da412693148 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:28 +0300
Subject: r600g: apply disable workaround on all scissors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During review of the "r600g: make all scissor states use single atom" patch
Marek Olšák noticed that scissor disable workaround should be applied on
all scissor states and not just first one, so let's do so.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/r600_state.c        | 22 +++++++++++++---------
 src/gallium/drivers/r600/r600_state_common.c |  1 +
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 64a22e6d305..c3db1437200 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -771,15 +771,15 @@ static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	struct r600_scissor_state *rstate = &rctx->scissor;
 	struct pipe_scissor_state *state;
+	bool do_disable_workaround = false;
 	uint32_t dirty_mask;
 	unsigned i, offset;
+	uint32_t tl, br;
 
 	if (rctx->b.chip_class == R600 && !rctx->scissor.enable) {
-		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
-		radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
-				     S_028240_WINDOW_OFFSET_DISABLE(1));
-		radeon_emit(cs, S_028244_BR_X(8192) | S_028244_BR_Y(8192));
-		return;
+		tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
+		br = S_028244_BR_X(8192) | S_028244_BR_Y(8192);
+		do_disable_workaround = true;
 	}
 
 	dirty_mask = rstate->dirty_mask;
@@ -787,11 +787,15 @@ static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom
 	{
 		i = u_bit_scan(&dirty_mask);
 		offset = i * 4 * 2;
-		state = &rstate->scissor[i];
 		radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
-		radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
-				     S_028240_WINDOW_OFFSET_DISABLE(1));
-		radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
+		if (!do_disable_workaround) {
+			state = &rstate->scissor[i];
+			tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
+				S_028240_WINDOW_OFFSET_DISABLE(1);
+			br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+		}
+		radeon_emit(cs, tl);
+		radeon_emit(cs, br);
 	}
 	rstate->dirty_mask = 0;
 	rstate->atom.num_dw = 0;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index d2b6ebec6a4..0b3fe857591 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -374,6 +374,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
 	if (rctx->b.chip_class == R600 &&
 	    rs->scissor_enable != rctx->scissor.enable) {
 		rctx->scissor.enable = rs->scissor_enable;
+		rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
 		r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
 	}
 
-- 
cgit v1.2.3


From 4d9af438bc41ed20da1963a438249fee43213121 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:29 +0300
Subject: r600g: make all viewport states use single atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similarly to scissor states, we can use single atom to track all viewport
states. This will allow to simplify dirty atom handling later.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c   |  7 ++---
 src/gallium/drivers/r600/r600_blit.c         |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c   |  8 +++---
 src/gallium/drivers/r600/r600_pipe.h         |  8 +++---
 src/gallium/drivers/r600/r600_state.c        |  6 +---
 src/gallium/drivers/r600/r600_state_common.c | 41 ++++++++++++++++++----------
 6 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 3a7f583ecfa..5212db3c1a7 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3450,7 +3450,7 @@ fallback:
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
 	unsigned id = 4;
-	int i;
+
 	/* !!!
 	 *  To avoid GPU lockup registers must be emited in a specific order
 	 * (no kidding ...). The order below is important and have been
@@ -3503,10 +3503,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
 	r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
 	r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 0);
-	for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-		r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-		rctx->viewport[i].idx = i;
-	}
+	r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
 	r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 8774cb5d655..d1370cd8f26 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -65,7 +65,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 	util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
 
 	if (op & R600_SAVE_FRAGMENT_STATE) {
-		util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state);
+		util_blitter_save_viewport(rctx->blitter, &rctx->viewport.state[0]);
 		util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor[0]);
 		util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
 		util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index c5403252d36..092f261700c 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -287,7 +287,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 void r600_begin_new_cs(struct r600_context *ctx)
 {
 	unsigned shader;
-	int i;
+
 	ctx->b.flags = 0;
 	ctx->b.gtt = 0;
 	ctx->b.vram = 0;
@@ -311,9 +311,9 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
 	ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
 	r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
-	for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-		r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
-	}
+	ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+	ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
+	r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
 	if (ctx->b.chip_class < EVERGREEN) {
 		r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
 	}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index e09bee15d46..bf6bd766097 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 60
+#define R600_NUM_ATOMS 45
 
 #define R600_MAX_VIEWPORTS 16
 
@@ -208,8 +208,8 @@ struct r600_stencil_ref_state {
 
 struct r600_viewport_state {
 	struct r600_atom atom;
-	struct pipe_viewport_state state;
-	int idx;
+	struct pipe_viewport_state state[R600_MAX_VIEWPORTS];
+	uint32_t dirty_mask;
 };
 
 struct r600_shader_stages_state {
@@ -463,7 +463,7 @@ struct r600_context {
 	struct r600_config_state	config_state;
 	struct r600_stencil_ref_state	stencil_ref;
 	struct r600_vgt_state		vgt_state;
-	struct r600_viewport_state	viewport[R600_MAX_VIEWPORTS];
+	struct r600_viewport_state	viewport;
 	/* Shaders and shader resources. */
 	struct r600_cso_state		vertex_fetch_shader;
 	struct r600_shader_state	vertex_shader;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c3db1437200..6724cc080b5 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -3032,7 +3032,6 @@ fallback:
 void r600_init_state_functions(struct r600_context *rctx)
 {
 	unsigned id = 4;
-	int i;
 
 	/* !!!
 	 *  To avoid GPU lockup registers must be emited in a specific order
@@ -3081,10 +3080,7 @@ void r600_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
 	r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
 	r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 0);
-	for (i = 0;i < R600_MAX_VIEWPORTS; i++) {
-		r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-		rctx->viewport[i].idx = i;
-	}
+	r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
 	r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 0b3fe857591..1f96ed6e12c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -704,28 +704,39 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
                                      const struct pipe_viewport_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_viewport_state *rstate = &rctx->viewport;
 	int i;
 
-	for (i = start_slot; i < start_slot + num_viewports; i++) {
-		rctx->viewport[i].state = state[i - start_slot];
-		r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
-	}
+	for (i = start_slot; i < start_slot + num_viewports; i++)
+		rstate->state[i] = state[i - start_slot];
+	rstate->dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+	rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 8;
+	r600_mark_atom_dirty(rctx, &rctx->viewport.atom);
 }
 
 void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-	struct r600_viewport_state *rstate = (struct r600_viewport_state *)atom;
-	struct pipe_viewport_state *state = &rstate->state;
-	int offset = rstate->idx * 6 * 4;
-
-	radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
-	radeon_emit(cs, fui(state->scale[0]));     /* R_02843C_PA_CL_VPORT_XSCALE_0  */
-	radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
-	radeon_emit(cs, fui(state->scale[1]));     /* R_028444_PA_CL_VPORT_YSCALE_0  */
-	radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
-	radeon_emit(cs, fui(state->scale[2]));     /* R_02844C_PA_CL_VPORT_ZSCALE_0  */
-	radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+	struct r600_viewport_state *rstate = &rctx->viewport;
+	struct pipe_viewport_state *state;
+	uint32_t dirty_mask;
+	unsigned i, offset;
+
+	dirty_mask = rstate->dirty_mask;
+	while (dirty_mask != 0) {
+		i = u_bit_scan(&dirty_mask);
+		offset = i * 6 * 4;
+		radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
+		state = &rstate->state[i];
+		radeon_emit(cs, fui(state->scale[0]));     /* R_02843C_PA_CL_VPORT_XSCALE_0  */
+		radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
+		radeon_emit(cs, fui(state->scale[1]));     /* R_028444_PA_CL_VPORT_YSCALE_0  */
+		radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
+		radeon_emit(cs, fui(state->scale[2]));     /* R_02844C_PA_CL_VPORT_ZSCALE_0  */
+		radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+	}
+	rstate->dirty_mask = 0;
+	rstate->atom.num_dw = 0;
 }
 
 /* Compute the key for the hw shader variant */
-- 
cgit v1.2.3


From 6ef4572937015c1e076afc12f19e8453d08babec Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:30 +0300
Subject: r600g: start numbering atoms from 1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There doesn't seem any reason to start from 4.
Start from 1 instead (0 is left reserved to catch uninitialized atoms).

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 2 +-
 src/gallium/drivers/r600/r600_pipe.h       | 2 +-
 src/gallium/drivers/r600/r600_state.c      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 5212db3c1a7..6f4cb55f887 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3449,7 +3449,7 @@ fallback:
 
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
-	unsigned id = 4;
+	unsigned id = 1;
 
 	/* !!!
 	 *  To avoid GPU lockup registers must be emited in a specific order
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index bf6bd766097..9860eff35a5 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 45
+#define R600_NUM_ATOMS 42
 
 #define R600_MAX_VIEWPORTS 16
 
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 6724cc080b5..93a74f7c2ca 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -3031,7 +3031,7 @@ fallback:
 
 void r600_init_state_functions(struct r600_context *rctx)
 {
-	unsigned id = 4;
+	unsigned id = 1;
 
 	/* !!!
 	 *  To avoid GPU lockup registers must be emited in a specific order
-- 
cgit v1.2.3


From ccbc7952a4c125741f412ebc0278e17b65cf6fd7 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:31 +0300
Subject: r600g: simplify dirty atom tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that R600_NUM_ATOMS is below 64, dirty atom tracking can be
simplified.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/r600_hw_context.c   |  9 +++---
 src/gallium/drivers/r600/r600_pipe.h         | 45 ++++------------------------
 src/gallium/drivers/r600/r600_state_common.c |  9 +++---
 3 files changed, 14 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 092f261700c..cf715976ab2 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -48,16 +48,15 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	num_dw += ctx->b.rings.gfx.cs->cdw;
 
 	if (count_draw_in) {
-		unsigned i;
+		uint64_t mask;
 
 		/* The number of dwords all the dirty states would take. */
-		i = r600_next_dirty_atom(ctx, 0);
-		while (i < R600_NUM_ATOMS) {
-			num_dw += ctx->atoms[i]->num_dw;
+		mask = ctx->dirty_atoms;
+		while (mask != 0) {
+			num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;
 			if (ctx->screen->b.trace_bo) {
 				num_dw += R600_TRACE_CS_DWORDS;
 			}
-			i = r600_next_dirty_atom(ctx, i + 1);
 		}
 
 		/* The upper-bound of how much space a draw command would take. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 9860eff35a5..76539d66ec6 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -87,9 +87,6 @@
 #define R600_BIG_ENDIAN 0
 #endif
 
-#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
-#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
-
 struct r600_context;
 struct r600_bytecode;
 union  r600_shader_key;
@@ -438,7 +435,7 @@ struct r600_context {
 	/* State binding slots are here. */
 	struct r600_atom		*atoms[R600_NUM_ATOMS];
 	/* Dirty atom bitmask for fast tests */
-	unsigned long			dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
+	uint64_t			dirty_atoms;
 	/* States for CS initialization. */
 	struct r600_command_buffer	start_cs_cmd; /* invariant state mostly */
 	/** Compute specific registers initializations.  The start_cs_cmd atom
@@ -515,18 +512,17 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx,
 				       struct r600_atom *atom,
 				       bool dirty)
 {
-	unsigned long mask;
-	unsigned int w;
+	uint64_t mask;
 
 	atom->dirty = dirty;
 
 	assert(atom->id != 0);
-	w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
-	mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+	assert(atom->id < sizeof(mask) * 8);
+	mask = 1ull << atom->id;
 	if (dirty)
-		rctx->dirty_atoms[w] |= mask;
+		rctx->dirty_atoms |= mask;
 	else
-		rctx->dirty_atoms[w] &= ~mask;
+		rctx->dirty_atoms &= ~mask;
 }
 
 static inline void r600_mark_atom_dirty(struct r600_context *rctx,
@@ -535,35 +531,6 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
 	r600_set_atom_dirty(rctx, atom, true);
 }
 
-static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
-						unsigned int id)
-{
-#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
-	unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
-	unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
-	unsigned long bits, mask = (1ul << bit) - 1;
-
-	for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
-		bits = rctx->dirty_atoms[w] & ~mask;
-		if (bits == 0)
-			continue;
-		return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
-	}
-
-	return R600_NUM_ATOMS;
-#else
-	for (; id < R600_NUM_ATOMS; id++) {
-		bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
-			(1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
-		assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
-		if (dirty)
-			break;
-	}
-
-	return id;
-#endif
-}
-
 void r600_trace_emit(struct r600_context *rctx);
 
 static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 1f96ed6e12c..39c65ae45c3 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1427,8 +1427,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct pipe_draw_info info = *dinfo;
 	struct pipe_index_buffer ib = {};
-	unsigned i;
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	uint64_t mask;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
 		return;
@@ -1538,10 +1538,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
 	r600_flush_emit(rctx);
 
-	i = r600_next_dirty_atom(rctx, 0);
-	while (i < R600_NUM_ATOMS) {
-		r600_emit_atom(rctx, rctx->atoms[i]);
-		i = r600_next_dirty_atom(rctx, i + 1);
+	mask = rctx->dirty_atoms;
+	while (mask != 0) {
+		r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
 	}
 
 	if (rctx->b.chip_class == CAYMAN) {
-- 
cgit v1.2.3


From 722ce747436f5b9c79d1fa4a8c59eed2f9cb611b Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 3 Sep 2015 01:54:32 +0300
Subject: gallium/radeon: remove 'dirty' member from r600_atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's no longer used by both r600 and radeonsi now.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/r600_pipe.h          | 2 --
 src/gallium/drivers/r600/r600_state_common.c  | 1 -
 src/gallium/drivers/radeon/r600_pipe_common.h | 3 +--
 src/gallium/drivers/radeonsi/si_state.c       | 1 -
 4 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 76539d66ec6..25df831339c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -514,8 +514,6 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx,
 {
 	uint64_t mask;
 
-	atom->dirty = dirty;
-
 	assert(atom->id != 0);
 	assert(atom->id < sizeof(mask) * 8);
 	mask = 1ull << atom->id;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 39c65ae45c3..ae1341187cb 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -56,7 +56,6 @@ void r600_add_atom(struct r600_context *rctx,
 	assert(rctx->atoms[id] == NULL);
 	rctx->atoms[id] = atom;
 	atom->id = id;
-	atom->dirty = false;
 }
 
 void r600_init_atom(struct r600_context *rctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d22c230ea3c..534b987a2cc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -316,8 +316,7 @@ struct r600_common_screen {
 struct r600_atom {
 	void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
 	unsigned		num_dw;
-	unsigned short		id;	/* used by r600 only */
-	bool			dirty;
+	unsigned short		id;
 };
 
 struct r600_so_target {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1e2f32a8799..f698c59d87a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -50,7 +50,6 @@ void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
 {
 	atom->emit = (void*)emit_func;
-	atom->dirty = false;
 	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
 	*list_elem = atom;
 }
-- 
cgit v1.2.3


From 6c1e368cf38e02174a8c88218ae711ab0b27954f Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 14:57:55 +0200
Subject: u_upload_mgr: optimize u_upload_alloc

This is probably the most called util function. It does almost nothing,
yet it can consume 10% of the CPU on the profile. This drops it down to 5%.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 32 ++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 744ea2e5e0c..4c560848e87 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -186,37 +186,39 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
                                 struct pipe_resource **outbuf,
                                 void **ptr )
 {
-   unsigned alloc_size = align( size, upload->alignment );
+   unsigned alloc_size = align(size, upload->alignment);
    unsigned alloc_offset = align(min_out_offset, upload->alignment);
+   unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
    unsigned offset;
 
-   /* Init these return values here in case we fail below to make
-    * sure the caller doesn't get garbage values.
-    */
-   *out_offset = ~0;
-   pipe_resource_reference(outbuf, NULL);
-   *ptr = NULL;
-
    /* Make sure we have enough space in the upload buffer
     * for the sub-allocation. */
-   if (!upload->buffer ||
-       MAX2(upload->offset, alloc_offset) + alloc_size > upload->buffer->width0) {
+   if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
       enum pipe_error ret = u_upload_alloc_buffer(upload,
                                                   alloc_offset + alloc_size);
-      if (ret != PIPE_OK)
+      if (unlikely(ret != PIPE_OK)) {
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
          return ret;
+      }
+
+      buffer_size = upload->buffer->width0;
    }
 
    offset = MAX2(upload->offset, alloc_offset);
 
-   if (!upload->map) {
+   if (unlikely(!upload->map)) {
       upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
                                           offset,
-                                          upload->buffer->width0 - offset,
+                                          buffer_size - offset,
                                           upload->map_flags,
 					  &upload->transfer);
-      if (!upload->map) {
+      if (unlikely(!upload->map)) {
          upload->transfer = NULL;
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
          return PIPE_ERROR_OUT_OF_MEMORY;
       }
 
@@ -229,7 +231,7 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
 
    /* Emit the return values: */
    *ptr = upload->map + offset;
-   pipe_resource_reference( outbuf, upload->buffer );
+   pipe_resource_reference(outbuf, upload->buffer);
    *out_offset = offset;
 
    upload->offset = offset + alloc_size;
-- 
cgit v1.2.3


From 8c6ff05517137ab9d96015783fcf1bf9d8d1fa12 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 15:08:23 +0200
Subject: u_upload_mgr: remove the return value from u_upload_alloc

The return buffer or the returned pointer can be used instead.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 29 +++++++++++++++--------------
 src/gallium/auxiliary/util/u_upload_mgr.h | 12 ++++++------
 src/gallium/auxiliary/util/u_vbuf.c       | 27 +++++++++++++--------------
 src/mesa/state_tracker/st_cb_bitmap.c     |  5 +++--
 src/mesa/state_tracker/st_cb_clear.c      |  7 ++++---
 src/mesa/state_tracker/st_cb_drawpixels.c |  5 +++--
 src/mesa/state_tracker/st_cb_drawtex.c    |  7 ++++---
 7 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 4c560848e87..7826b61bf87 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -179,12 +179,13 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
    return PIPE_OK;
 }
 
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr )
+void
+u_upload_alloc(struct u_upload_mgr *upload,
+               unsigned min_out_offset,
+               unsigned size,
+               unsigned *out_offset,
+               struct pipe_resource **outbuf,
+               void **ptr)
 {
    unsigned alloc_size = align(size, upload->alignment);
    unsigned alloc_offset = align(min_out_offset, upload->alignment);
@@ -200,7 +201,7 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
          *out_offset = ~0;
          pipe_resource_reference(outbuf, NULL);
          *ptr = NULL;
-         return ret;
+         return;
       }
 
       buffer_size = upload->buffer->width0;
@@ -219,7 +220,7 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
          *out_offset = ~0;
          pipe_resource_reference(outbuf, NULL);
          *ptr = NULL;
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         return;
       }
 
       upload->map -= offset;
@@ -235,7 +236,6 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
    *out_offset = offset;
 
    upload->offset = offset + alloc_size;
-   return PIPE_OK;
 }
 
 enum pipe_error u_upload_data( struct u_upload_mgr *upload,
@@ -246,11 +246,12 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
                                struct pipe_resource **outbuf)
 {
    uint8_t *ptr;
-   enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
-                                        out_offset, outbuf,
-                                        (void**)&ptr);
-   if (ret != PIPE_OK)
-      return ret;
+
+   u_upload_alloc(upload, min_out_offset, size,
+                  out_offset, outbuf,
+                  (void**)&ptr);
+   if (!outbuf)
+      return PIPE_ERROR_OUT_OF_MEMORY;
 
    memcpy(ptr, data, size);
    return PIPE_OK;
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 63bf30e38eb..2c319779eca 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -78,12 +78,12 @@ void u_upload_unmap( struct u_upload_mgr *upload );
  * \param outbuf           Pointer to where the upload buffer will be returned.
  * \param ptr              Pointer to the allocated memory that is returned.
  */
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr );
+void u_upload_alloc(struct u_upload_mgr *upload,
+                    unsigned min_out_offset,
+                    unsigned size,
+                    unsigned *out_offset,
+                    struct pipe_resource **outbuf,
+                    void **ptr);
 
 
 /**
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 02ae0b840f0..791d82bb65f 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -406,7 +406,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
    struct pipe_resource *out_buffer = NULL;
    uint8_t *out_map;
    unsigned out_offset, mask;
-   enum pipe_error err;
 
    /* Get a translate object. */
    tr = translate_cache_find(mgr->translate_cache, key);
@@ -454,12 +453,12 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
       assert((ib->buffer || ib->user_buffer) && ib->index_size);
 
       /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader, 0,
-                           key->output_stride * num_indices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader, 0,
+                     key->output_stride * num_indices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
 
       if (ib->user_buffer) {
          map = (uint8_t*)ib->user_buffer + offset;
@@ -486,13 +485,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
       }
    } else {
       /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader,
-                           key->output_stride * start_vertex,
-                           key->output_stride * num_vertices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader,
+                     key->output_stride * start_vertex,
+                     key->output_stride * num_vertices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
 
       out_offset -= key->output_stride * start_vertex;
 
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 01a96c18264..ce593a94593 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -349,8 +349,9 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
       tBot = (GLfloat) height;
    }
 
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
-                      vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+                  vbuf_offset, vbuf, (void **) &vertices);
+   if (!vbuf) {
       return;
    }
 
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 137fac8a9a9..18efd14a57c 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -184,9 +184,10 @@ draw_quad(struct st_context *st,
 
    vb.stride = 8 * sizeof(float);
 
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
-                      &vb.buffer_offset, &vb.buffer,
-                      (void **) &vertices) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+                  &vb.buffer_offset, &vb.buffer,
+                  (void **) &vertices);
+   if (!vb.buffer) {
       return;
    }
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index b372697026b..152160e1dd2 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -580,8 +580,9 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
    struct pipe_resource *buf = NULL;
    unsigned offset;
 
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
-                      &buf, (void **) &verts) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
+                  &buf, (void **) &verts);
+   if (!buf) {
       return;
    }
 
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c
index 2af4f6d4cf6..2634b09d777 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -149,9 +149,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
       GLfloat *vbuf = NULL;
       GLuint attr;
 
-      if (u_upload_alloc(st->uploader, 0,
-                         numAttribs * 4 * 4 * sizeof(GLfloat),
-                         &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) {
+      u_upload_alloc(st->uploader, 0,
+                     numAttribs * 4 * 4 * sizeof(GLfloat),
+                     &offset, &vbuffer, (void **) &vbuf);
+      if (!vbuffer) {
          return;
       }
       
-- 
cgit v1.2.3


From b4f7639955b6c74436db6dea9174a8c7ce37ec62 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 15:11:40 +0200
Subject: u_upload_mgr: remove the return value from u_upload_alloc_buffer

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 7826b61bf87..ff5d834b01d 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -129,9 +129,9 @@ void u_upload_destroy( struct u_upload_mgr *upload )
 }
 
 
-static enum pipe_error 
-u_upload_alloc_buffer( struct u_upload_mgr *upload,
-                       unsigned min_size )
+static void
+u_upload_alloc_buffer(struct u_upload_mgr *upload,
+                      unsigned min_size)
 {
    struct pipe_screen *screen = upload->pipe->screen;
    struct pipe_resource buffer;
@@ -161,9 +161,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
    }
 
    upload->buffer = screen->resource_create(screen, &buffer);
-   if (upload->buffer == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
-   }
+   if (upload->buffer == NULL)
+      return;
 
    /* Map the new buffer. */
    upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
@@ -172,11 +171,10 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
    if (upload->map == NULL) {
       upload->transfer = NULL;
       pipe_resource_reference(&upload->buffer, NULL);
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      return;
    }
 
    upload->offset = 0;
-   return PIPE_OK;
 }
 
 void
@@ -195,9 +193,9 @@ u_upload_alloc(struct u_upload_mgr *upload,
    /* Make sure we have enough space in the upload buffer
     * for the sub-allocation. */
    if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
-      enum pipe_error ret = u_upload_alloc_buffer(upload,
-                                                  alloc_offset + alloc_size);
-      if (unlikely(ret != PIPE_OK)) {
+      u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
+
+      if (unlikely(!upload->buffer)) {
          *out_offset = ~0;
          pipe_resource_reference(outbuf, NULL);
          *ptr = NULL;
-- 
cgit v1.2.3


From 0c5df863ba27d31993f3fdc85b26407f398514fa Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 15:11:40 +0200
Subject: u_upload_mgr: remove the return value from u_upload_buffer

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 35 ++++++++++---------------------
 src/gallium/auxiliary/util/u_upload_mgr.h | 14 ++++++-------
 2 files changed, 18 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index ff5d834b01d..78b0f5f99a0 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -255,21 +255,15 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
    return PIPE_OK;
 }
 
-
-/* As above, but upload the full contents of a buffer.  Useful for
- * uploading user buffers, avoids generating an explosion of GPU
- * buffers if you have an app that does lots of small vertex buffer
- * renders or DrawElements calls.
- */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf)
+/* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf)
 {
-   enum pipe_error ret = PIPE_OK;
    struct pipe_transfer *transfer = NULL;
    const char *map = NULL;
 
@@ -280,20 +274,13 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
                                              &transfer);
 
    if (map == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      pipe_resource_reference(outbuf, NULL);
+      return;
    }
 
    if (0)
       debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
 
-   ret = u_upload_data( upload,
-                        min_out_offset,
-                        size,
-                        map,
-                        out_offset,
-                        outbuf);
-
+   u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
    pipe_buffer_unmap( upload->pipe, transfer );
-
-   return ret;
 }
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 2c319779eca..9744dc17c49 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -106,13 +106,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
  * Same as u_upload_data, except that the input data comes from a buffer
  * instead of a user pointer.
  */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf);
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf);
 
 
-- 
cgit v1.2.3


From 44dbaa1746833f2874786fc2067f8837f149261f Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 15:11:40 +0200
Subject: u_upload_mgr: remove the return value from u_upload_data

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 19 ++++++++-----------
 src/gallium/auxiliary/util/u_upload_mgr.h | 12 ++++++------
 src/gallium/auxiliary/util/u_vbuf.c       |  9 ++++-----
 src/mesa/state_tracker/st_draw.c          |  7 ++++---
 4 files changed, 22 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 78b0f5f99a0..59207a1969b 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -236,23 +236,20 @@ u_upload_alloc(struct u_upload_mgr *upload,
    upload->offset = offset + alloc_size;
 }
 
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf)
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf)
 {
    uint8_t *ptr;
 
    u_upload_alloc(upload, min_out_offset, size,
                   out_offset, outbuf,
                   (void**)&ptr);
-   if (!outbuf)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   memcpy(ptr, data, size);
-   return PIPE_OK;
+   if (ptr)
+      memcpy(ptr, data, size);
 }
 
 /* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 9744dc17c49..67c6daa4e7f 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -92,12 +92,12 @@ void u_upload_alloc(struct u_upload_mgr *upload,
  * Same as u_upload_alloc, but in addition to that, it copies "data"
  * to the pointer returned from u_upload_alloc.
  */
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf);
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf);
 
 
 /**
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 791d82bb65f..3d2193c3bf5 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -976,7 +976,6 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
       unsigned start, end;
       struct pipe_vertex_buffer *real_vb;
       const uint8_t *ptr;
-      enum pipe_error err;
 
       i = u_bit_scan(&buffer_mask);
 
@@ -987,10 +986,10 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
       real_vb = &mgr->real_vertex_buffer[i];
       ptr = mgr->vertex_buffer[i].user_buffer;
 
-      err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
-                          &real_vb->buffer_offset, &real_vb->buffer);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_data(mgr->uploader, start, end - start, ptr + start,
+                    &real_vb->buffer_offset, &real_vb->buffer);
+      if (!real_vb->buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
 
       real_vb->buffer_offset -= start;
    }
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 957fcfd410e..2ad679b1cb8 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -106,9 +106,10 @@ setup_index_buffer(struct st_context *st,
    }
    else if (st->indexbuf_uploader) {
       /* upload indexes from user memory into a real buffer */
-      if (u_upload_data(st->indexbuf_uploader, 0,
-                        ib->count * ibuffer->index_size, ib->ptr,
-                        &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) {
+      u_upload_data(st->indexbuf_uploader, 0,
+                    ib->count * ibuffer->index_size, ib->ptr,
+                    &ibuffer->offset, &ibuffer->buffer);
+      if (!ibuffer->buffer) {
          /* out of memory */
          return FALSE;
       }
-- 
cgit v1.2.3


From 35d0f12797237cdd38e7fd2c39d3c19e875875ca Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Tue, 1 Sep 2015 04:07:54 +0200
Subject: gallium/pb_bufmgr_cache: add a way to remove buffers from the cache
 explicitly

This must be done before exporting a buffer as dmabuf fds, because
we lose track of who is using it and can't trust the reference counter.

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/pipebuffer/pb_bufmgr.h       |  5 +++
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 42 ++++++++++++++++++----
 2 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index 147ce39041c..1638d96a63b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                         unsigned bypass_usage,
                         uint64_t maximum_cache_size);
 
+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);
 
 struct pb_fence_ops;
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 3b35049f679..cc8ae84bb1b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
 }
 
 
+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
  * Actually destroy the buffer.
  */
 static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
    assert(!pipe_is_referenced(&buf->base.reference));
    pb_reference(&buf->buffer, NULL);
    FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
    struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
    struct pb_cache_manager *mgr = buf->mgr;
 
+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
    pipe_mutex_lock(mgr->mutex);
    assert(!pipe_is_referenced(&buf->base.reference));
    
-- 
cgit v1.2.3


From 54964c77510b060806615c842692c0f393e807e6 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Tue, 1 Sep 2015 04:14:33 +0200
Subject: winsys/amdgpu: remove exported buffers from the cache

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 50c42e3599a..fe55dc3108a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
    enum amdgpu_bo_handle_type type;
    int r;
 
+   if ((void*)bo != (void*)buffer)
+      pb_cache_manager_remove_buffer(buffer);
+
    switch (whandle->type) {
    case DRM_API_HANDLE_TYPE_SHARED:
       type = amdgpu_bo_handle_type_gem_flink_name;
-- 
cgit v1.2.3


From efea7c3a3f91219db6e2fa3588388b6be4ecfa40 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Tue, 1 Sep 2015 04:14:43 +0200
Subject: winsys/radeon: remove exported buffers from the cache

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 600ced924ba..2878c8f5744 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1150,6 +1150,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
 
     memset(&flink, 0, sizeof(flink));
 
+    if ((void*)bo != (void*)buffer)
+       pb_cache_manager_remove_buffer(buffer);
+
     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
         if (!bo->flink_name) {
             flink.handle = bo->handle;
-- 
cgit v1.2.3


From 70dbdca15fa173302481111cdfb86881dd13dc38 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 3 Sep 2015 11:23:36 -0600
Subject: svga: update call to u_upload_alloc()

u_upload_alloc() no longer returns a return value.

Trivial.
---
 src/gallium/drivers/svga/svga_state_constants.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index e0bbe013c02..b6d6de0dca3 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -613,9 +613,9 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
     */
    new_buf_size = align(new_buf_size, 16);
 
-   ret = u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
-                        &dst_buffer, &dst_map);
-   if (ret != PIPE_OK || !dst_map) {
+   u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+                  &dst_buffer, &dst_map);
+   if (!dst_map) {
       if (src_map)
          pipe_buffer_unmap(&svga->pipe, src_transfer);
       return PIPE_ERROR_OUT_OF_MEMORY;
-- 
cgit v1.2.3


From b05619c627122a0e35a18f92e457d3aefa55f2f7 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Wed, 26 Aug 2015 16:35:40 -0700
Subject: i965/gen9: Annotate input coverage mask change

As far as I can tell, the behavior is preserved from the previous generations.
Before we set a single bit to tell the FS whether or not we'll be using an input
coverage mask. Now we have some options which are implementing various
extensions. These bits are used for the various conservative rasterization
mechanisms (for collision detection, binning, and whatever else).

I believe that the behavior is preserved because the problem which conservative
rasterization is attempting to fix would go away with the "NORMAL" mode (at the
cost of performance, I believe).

This patch serves as documentation of the change by creating the enums, as well
as giving some of the history with the links here so that the next person who
comes along and looks at it doesn't spend as long as I had to in order to
determine if there is an issue or not.

Previously, this algorithm had been done in software, and this can still be used
as long as we don't export an extension stating otherwise.

References: https://www.opengl.org/registry/specs/NV/conservative_raster.txt
References: https://http.developer.nvidia.com/GPUGems2/gpugems2_chapter42.html
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h   | 16 ++++++++++++++++
 src/mesa/drivers/dri/i965/gen8_ps_state.c |  8 ++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e6f2a2e0f2c..3c128aee809 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2266,6 +2266,21 @@ enum brw_pixel_shader_computed_depth_mode {
    BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
 };
 
+enum brw_pixel_shader_coverage_mask_mode {
+   BRW_PSICMS_OFF     = 0, /* PS does not use input coverage masks. */
+   BRW_PSICMS_NORMAL  = 1, /* Input Coverage masks based on outer conservatism
+                            * and factors in SAMPLE_MASK.  If Pixel is
+                            * conservatively covered, all samples are enabled.
+                            */
+
+   BRW_PSICMS_INNER   = 2, /* Input Coverage masks based on inner conservatism
+                            * and factors in SAMPLE_MASK.  If Pixel is
+                            * conservatively *FULLY* covered, all samples are
+                            * enabled.
+                            */
+   BRW_PCICMS_DEPTH   = 3,
+};
+
 #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
 /* DW1 */
 # define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
@@ -2283,6 +2298,7 @@ enum brw_pixel_shader_computed_depth_mode {
 # define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
 # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
 # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
+# define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT     0
 
 enum brw_wm_barycentric_interp_mode {
    BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		= 0,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index ae18f0f162c..a686fed704f 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -52,8 +52,12 @@ gen8_upload_ps_extra(struct brw_context *brw,
        _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
       dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
 
-   if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
-      dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+   if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+      if (brw->gen >= 9)
+         dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+      else
+         dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+   }
 
    if (prog_data->uses_omask)
       dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
-- 
cgit v1.2.3


From 5fa5a012b163bad80d01a65e2947510f15e1be6b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 4 Sep 2015 08:02:14 +1000
Subject: r600: fix loop overrun in cayman_mul_double_instr

Coverity warned about this. Ilia pointed it out.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 9043668a532..78904da13c3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3194,7 +3194,7 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 	int t1 = ctx->temp_reg;
 
-	for (k = 0; k <= 2; k++) {
+	for (k = 0; k < 2; k++) {
 		if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
 			continue;
 
-- 
cgit v1.2.3


From 2ace64fd598816fd1be9877962734242fc27b87b Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 2 Sep 2015 16:39:27 -0700
Subject: i965: Fix copy propagation type changes.

commit 472ef9a02f2e5c5d0caa2809cb736a0f4f0d4693 introduced code to
change the types of SEL and MOV instructions for moves that simply
"copy bits around".  It didn't account for type conversion moves,
however.  So it would happily turn this:

   mov(8) vgrf6:D, -vgrf5:D
   mov(8) vgrf7:F, vgrf6:UD

into this:

   mov(8) vgrf6:D, -vgrf5:D
   mov(8) vgrf7:D, -vgrf5:D

which erroneously drops the conversion to float.

Cc: "11.0 10.6" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5445ad55670..230b0caec47 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
 can_change_source_types(fs_inst *inst)
 {
    return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
           (inst->opcode == BRW_OPCODE_MOV ||
            (inst->opcode == BRW_OPCODE_SEL &&
             inst->predicate != BRW_PREDICATE_NONE &&
-- 
cgit v1.2.3


From 04e201d0c02cd30ace5c6fe80e9f021ebb733682 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Mon, 31 Aug 2015 09:54:23 +0300
Subject: mesa: change 'SHADER_SUBST' facility to work with env variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch modifies existing shader source and replace functionality to work
with environment variables rather than enable dumping on compile time.
Also instead of _mesa_str_checksum, _mesa_sha1_compute is used to avoid
collisions.

Functionality is controlled via two environment variables:

MESA_SHADER_DUMP_PATH - path where shader sources are dumped
MESA_SHADER_READ_PATH - path where replacement shaders are read

v2: cleanups, add strerror if fopen fails, put all functionality
    inside HAVE_SHA1 since sha1 is required

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Suggested-by: Eero Tamminen <eero.t.tamminen@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 docs/shading.html         |  14 +++++
 src/mesa/main/shaderapi.c | 139 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 115 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/docs/shading.html b/docs/shading.html
index 77a0ee413d9..e9fe3dde166 100644
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -63,6 +63,20 @@ execution.  These are generally used for debugging.
 Example:  export MESA_GLSL=dump,nopt
 </p>
 
+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa 
+needs to be configured with '--with-sha1' to enable this functionality. This 
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work. 
+When both are set, these paths should be different so the dumped shaders do 
+not clobber the replacement shaders.
+</p>
 
 <h2 id="support">GLSL Version</h2>
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 0e0e0d6ba30..7680b5875b2 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -53,15 +53,13 @@
 #include "program/prog_parameter.h"
 #include "util/ralloc.h"
 #include "util/hash_table.h"
+#include "util/mesa-sha1.h"
 #include <stdbool.h>
 #include "../glsl/glsl_parser_extras.h"
 #include "../glsl/ir.h"
 #include "../glsl/ir_uniform.h"
 #include "../glsl/program.h"
 
-/** Define this to enable shader substitution (see below) */
-#define SHADER_SUBST 0
-
 
 /**
  * Return mask of GLSL_x flags by examining the MESA_GLSL env var.
@@ -1512,24 +1510,100 @@ _mesa_LinkProgram(GLhandleARB programObj)
    link_program(ctx, programObj);
 }
 
+#if defined(HAVE_SHA1)
+/**
+ * Generate a SHA-1 hash value string for given source string.
+ */
+static void
+generate_sha1(const char *source, char sha_str[64])
+{
+   unsigned char sha[20];
+   _mesa_sha1_compute(source, strlen(source), sha);
+   _mesa_sha1_format(sha_str, sha);
+}
+
+/**
+ * Construct a full path for shader replacement functionality using
+ * following format:
+ *
+ * <path>/<stage prefix>_<CHECKSUM>.glsl
+ */
+static void
+construct_name(const gl_shader_stage stage, const char *source,
+               const char *path, char *name, unsigned length)
+{
+   char sha[64];
+   static const char *types[] = {
+      "VS", "TC", "TE", "GS", "FS", "CS",
+   };
+
+   generate_sha1(source, sha);
+   _mesa_snprintf(name, length, "%s/%s_%s.glsl", path, types[stage],
+                  sha);
+}
+
+/**
+ * Write given shader source to a file in MESA_SHADER_DUMP_PATH.
+ */
+static void
+dump_shader(const gl_shader_stage stage, const char *source)
+{
+   char name[PATH_MAX];
+   static bool path_exists = true;
+   char *dump_path;
+   FILE *f;
+
+   if (!path_exists)
+      return;
+
+   dump_path = getenv("MESA_SHADER_DUMP_PATH");
+   if (!dump_path) {
+      path_exists = false;
+      return;
+   }
 
+   construct_name(stage, source, dump_path, name, PATH_MAX);
+
+   f = fopen(name, "w");
+   if (f) {
+      fputs(source, f);
+      fclose(f);
+   } else {
+      GET_CURRENT_CONTEXT(ctx);
+      _mesa_warning(ctx, "could not open %s for dumping shader (%s)", name,
+                    strerror(errno));
+   }
+}
 
 /**
  * Read shader source code from a file.
  * Useful for debugging to override an app's shader.
  */
 static GLcharARB *
-read_shader(const char *fname)
+read_shader(const gl_shader_stage stage, const char *source)
 {
-   int shader_size = 0;
-   FILE *f = fopen(fname, "r");
-   GLcharARB *buffer, *shader;
-   int len;
+   char name[PATH_MAX];
+   char *read_path;
+   static bool path_exists = true;
+   int len, shader_size = 0;
+   GLcharARB *buffer;
+   FILE *f;
+
+   if (!path_exists)
+      return NULL;
 
-   if (!f) {
+   read_path = getenv("MESA_SHADER_READ_PATH");
+   if (!read_path) {
+      path_exists = false;
       return NULL;
    }
 
+   construct_name(stage, source, read_path, name, PATH_MAX);
+
+   f = fopen(name, "r");
+   if (!f)
+      return NULL;
+
    /* allocate enough room for the entire shader */
    fseek(f, 0, SEEK_END);
    shader_size = ftell(f);
@@ -1547,12 +1621,9 @@ read_shader(const char *fname)
 
    fclose(f);
 
-   shader = strdup(buffer);
-   free(buffer);
-
-   return shader;
+   return buffer;
 }
-
+#endif /* HAVE_SHA1 */
 
 /**
  * Called via glShaderSource() and glShaderSourceARB() API functions.
@@ -1567,7 +1638,11 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
    GLint *offsets;
    GLsizei i, totalLength;
    GLcharARB *source;
-   GLuint checksum;
+
+#if defined(HAVE_SHA1)
+   GLcharARB *replacement;
+   struct gl_shader *sh;
+#endif /* HAVE_SHA1 */
 
    if (!shaderObj || string == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
@@ -1620,35 +1695,23 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
    source[totalLength - 1] = '\0';
    source[totalLength - 2] = '\0';
 
-   if (SHADER_SUBST) {
-      /* Compute the shader's source code checksum then try to open a file
-       * named newshader_<CHECKSUM>.  If it exists, use it in place of the
-       * original shader source code.  For debugging.
-       */
-      char filename[100];
-      GLcharARB *newSource;
+#if defined(HAVE_SHA1)
+   sh = _mesa_lookup_shader(ctx, shaderObj);
 
-      checksum = _mesa_str_checksum(source);
-
-      _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum);
+   /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
+    * if corresponding entry found from MESA_SHADER_READ_PATH.
+    */
+   dump_shader(sh->Stage, source);
 
-      newSource = read_shader(filename);
-      if (newSource) {
-         fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n",
-                       shaderObj, checksum, filename);
-         free(source);
-         source = newSource;
-      }
+   replacement = read_shader(sh->Stage, source);
+   if (replacement) {
+      free(source);
+      source = replacement;
    }
+#endif /* HAVE_SHA1 */
 
    shader_source(ctx, shaderObj, source);
 
-   if (SHADER_SUBST) {
-      struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj);
-      if (sh)
-         sh->SourceChecksum = checksum; /* save original checksum */
-   }
-
    free(offsets);
 }
 
-- 
cgit v1.2.3


From a2151560b8d65be31129c00872ea8d70c564b110 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 18:27:20 -0700
Subject: i965: Move brw_setup_tex_for_precompile to brw_program.[ch].

This living in brw_fs.{h,cpp} is a historical artifact of us supporting
texturing for fragment shaders before any other stages.  It's kind of
awkward given that we use it for all stages.

This avoids having to include brw_fs.h in geometry shader code in order
to access this function.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp    | 19 -------------------
 src/mesa/drivers/dri/i965/brw_fs.h      |  3 ---
 src/mesa/drivers/dri/i965/brw_program.c | 19 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_program.h |  4 ++++
 4 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e28eb5db026..d240371767b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5285,22 +5285,3 @@ brw_fs_precompile(struct gl_context *ctx,
 
    return success;
 }
-
-void
-brw_setup_tex_for_precompile(struct brw_context *brw,
-                             struct brw_sampler_prog_key_data *tex,
-                             struct gl_program *prog)
-{
-   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
-   for (unsigned i = 0; i < sampler_count; i++) {
-      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
-         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
-         tex->swizzles[i] =
-            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
-      } else {
-         /* Color sampler: assume no swizzling. */
-         tex->swizzles[i] = SWIZZLE_XYZW;
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0a89d2e7640..dd0526a1550 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -514,6 +514,3 @@ private:
 
 bool brw_do_channel_expressions(struct exec_list *instructions);
 bool brw_do_vector_splitting(struct exec_list *instructions);
-void brw_setup_tex_for_precompile(struct brw_context *brw,
-                                  struct brw_sampler_prog_key_data *tex,
-                                  struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 4f380184464..1ac0ed273ef 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -588,3 +588,22 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
       _mesa_print_program(prog);
    }
 }
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+                             struct brw_sampler_prog_key_data *tex,
+                             struct gl_program *prog)
+{
+   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   for (unsigned i = 0; i < sampler_count; i++) {
+      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         tex->swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         tex->swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index eaa7e4e9a79..bfcd1c93178 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -159,6 +159,10 @@ struct brw_wm_prog_key {
 extern "C" {
 #endif
 
+void brw_setup_tex_for_precompile(struct brw_context *brw,
+                                  struct brw_sampler_prog_key_data *tex,
+                                  struct gl_program *prog);
+
 void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
 				        const struct gl_program *prog,
                                         unsigned sampler_count,
-- 
cgit v1.2.3


From 294282aaa6a517b455d3e31d12e2d85516ac04e6 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 14:04:40 -0700
Subject: i965: Remove legacy clip plane handling from geometry shaders.

We only support geometry shaders in core profiles, where gl_ClipVertex
doesn't exist.  Presumably the even older behavior of clipping to
gl_Position isn't supported either.  In fact, GLSL 1.50 page 76 claims:

"The shader must also set all values in gl_ClipDistance that have been
 enabled via the OpenGL API, or results are undefined."

So we don't need to handle legacy clipping in geometry shaders.  I think
Paul added this back when we were considering supporting the old
GL_ARB_geometry_shader4 extension.

This removes a non-orthagonal state dependency on GS compilation.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_context.h |  5 -----
 src/mesa/drivers/dri/i965/brw_gs.c      | 11 -----------
 src/mesa/drivers/dri/i965/brw_vs.c      | 25 ++++++++-----------------
 3 files changed, 8 insertions(+), 33 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 02e7bb4f8e7..41ba7696d40 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -2057,11 +2057,6 @@ void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
 
 uint32_t get_hw_prim_for_gl_prim(int mode);
 
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
-                            struct brw_vue_prog_key *key,
-                            bool program_uses_clip_distance);
-
 void
 gen6_upload_push_constants(struct brw_context *brw,
                            const struct gl_program *prog,
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 5c0d9230162..f1da63543c4 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -121,15 +121,6 @@ brw_codegen_gs_prog(struct brw_context *brw,
 
    GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
 
-   /* In order for legacy clipping to work, we need to populate the clip
-    * distance varying slots whenever clipping is enabled, even if the vertex
-    * shader doesn't write to gl_ClipDistance.
-    */
-   if (c.key.base.userclip_active) {
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
-   }
-
    brw_compute_vue_map(brw->intelScreen->devinfo,
                        &c.prog_data.base.vue_map, outputs_written);
 
@@ -310,8 +301,6 @@ brw_gs_populate_key(struct brw_context *brw,
    memset(key, 0, sizeof(*key));
 
    key->base.program_string_id = gp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               gp->program.Base.UsesClipDistanceOut);
 
    /* _NEW_TEXTURE */
    brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index c53cb49b612..211929a5235 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -279,21 +279,6 @@ brw_vs_debug_recompile(struct brw_context *brw,
    }
 }
 
-
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
-                            struct brw_vue_prog_key *key,
-                            bool program_uses_clip_distance)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
-   if (key->userclip_active && !program_uses_clip_distance) {
-      key->nr_userclip_plane_consts
-         = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-   }
-}
-
 static bool
 brw_vs_state_dirty(struct brw_context *brw)
 {
@@ -325,8 +310,14 @@ brw_vs_populate_key(struct brw_context *brw,
     * the inputs it asks for, whether they are varying or not.
     */
    key->base.program_string_id = vp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               vp->program.Base.UsesClipDistanceOut);
+
+   if (ctx->Transform.ClipPlanesEnabled != 0) {
+      key->base.userclip_active = true;
+      if (!vp->program.Base.UsesClipDistanceOut) {
+         key->base.nr_userclip_plane_consts =
+            _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+      }
+   }
 
    /* _NEW_POLYGON */
    if (brw->gen < 6) {
-- 
cgit v1.2.3


From 082b7f1876095f32578720f30fdc35771b2b3e0a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 17:02:27 -0700
Subject: i965: Delete the brw_vue_program_key::userclip_active flag.

There are two uses of this flag.

The primary use is checking whether we need to emit code to convert
legacy gl_ClipVertex/gl_Position clipping to clip distances.  In this
case, we also have to upload the clip planes as uniforms, which means
setting nr_userclip_plane_consts to a positive value.  Checking if it's
> 0 works for detecting this case.

Gen4-5 also wants to know whether we're doing clipping at all, so it can
emit user clip flags.  Checking if output_reg[VARYING_SLOT_CLIP_DIST0]
is set to a real register suffices for this.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_program.h        |  9 +++------
 src/mesa/drivers/dri/i965/brw_vec4.cpp         |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  7 ++++---
 src/mesa/drivers/dri/i965/brw_vs.c             | 17 ++++++-----------
 5 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 504673f8bd9..7100646750e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -810,7 +810,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
       (const struct brw_vue_prog_key *) this->key;
 
    /* Bail unless some sort of legacy clipping is enabled */
-   if (!key->userclip_active || prog->UsesClipDistanceOut)
+   if (key->nr_userclip_plane_consts == 0)
       return;
 
    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index bfcd1c93178..5ebf9226cdd 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -81,15 +81,12 @@ struct brw_sampler_prog_key_data {
 struct brw_vue_prog_key {
    unsigned program_string_id;
 
-   /**
-    * True if at least one clip flag is enabled, regardless of whether the
-    * shader uses clip planes or gl_ClipDistance.
-    */
-   bool userclip_active:1;
-
    /**
     * How many user clipping planes are being uploaded to the vertex shader as
     * push constants.
+    *
+    * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+    * clip distances.
     */
    unsigned nr_userclip_plane_consts:4;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 501461c6d94..0c2326c9cec 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1750,7 +1750,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
    }
    base_ir = NULL;
 
-   if (key->userclip_active && !prog->UsesClipDistanceOut)
+   if (key->nr_userclip_plane_consts > 0)
       setup_uniform_clipplane_values(clip_planes);
 
    emit_thread_end();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ca86e8b6d56..4d3d28164b5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3121,7 +3121,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
 {
    if (devinfo->gen < 6 &&
        ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-        key->userclip_active || devinfo->has_negative_rhw_bug)) {
+        output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+        devinfo->has_negative_rhw_bug)) {
       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
       dst_reg header1_w = header1;
       header1_w.writemask = WRITEMASK_W;
@@ -3136,7 +3137,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
 	 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
       }
 
-      if (key->userclip_active) {
+      if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
          current_annotation = "Clipping flags";
          dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
          dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
@@ -3354,7 +3355,7 @@ vec4_visitor::emit_vertex()
    }
 
    /* Lower legacy ff and ClipVertex clipping to clip distances */
-   if (key->userclip_active && !prog->UsesClipDistanceOut) {
+   if (key->nr_userclip_plane_consts > 0) {
       current_annotation = "user clip distances";
 
       output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 211929a5235..365396825dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -172,7 +172,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
     * distance varying slots whenever clipping is enabled, even if the vertex
     * shader doesn't write to gl_ClipDistance.
     */
-   if (key->base.userclip_active) {
+   if (key->base.nr_userclip_plane_consts > 0) {
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
    }
@@ -257,10 +257,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
                          key->gl_attrib_wa_flags[i]);
    }
 
-   found |= key_debug(brw, "user clip flags",
-                      old_key->base.userclip_active, key->base.userclip_active);
-
-   found |= key_debug(brw, "user clipping planes as push constants",
+   found |= key_debug(brw, "legacy user clipping",
                       old_key->base.nr_userclip_plane_consts,
                       key->base.nr_userclip_plane_consts);
 
@@ -311,12 +308,10 @@ brw_vs_populate_key(struct brw_context *brw,
     */
    key->base.program_string_id = vp->id;
 
-   if (ctx->Transform.ClipPlanesEnabled != 0) {
-      key->base.userclip_active = true;
-      if (!vp->program.Base.UsesClipDistanceOut) {
-         key->base.nr_userclip_plane_consts =
-            _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-      }
+   if (ctx->Transform.ClipPlanesEnabled != 0 &&
+       !vp->program.Base.UsesClipDistanceOut) {
+      key->base.nr_userclip_plane_consts =
+         _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
    }
 
    /* _NEW_POLYGON */
-- 
cgit v1.2.3


From 014b90221ad5cf833bfdd55b0336771d209f0f1d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 23:49:03 -0700
Subject: i965: Move legacy clip plane handling to vec4_vs_visitor.

This is now only used for the vertex shader, so it makes sense to get it
out of any paths run by the geometry shader.

Instead of passing the gl_clip_plane array into the run() method (which
is shared among all subclasses), we add it as a vec4_vs_visitor
constructor parameter.  This eliminates the bogus NULL parameter in the
GS case.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp            | 10 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h              |  4 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  4 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    | 56 --------------------
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 62 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.h                |  5 ++
 6 files changed, 74 insertions(+), 67 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 0c2326c9cec..073e90beb60 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1721,7 +1721,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
 }
 
 bool
-vec4_visitor::run(gl_clip_plane *clip_planes)
+vec4_visitor::run()
 {
    bool use_vec4_nir =
       compiler->glsl_compiler_options[stage].NirOptions != NULL;
@@ -1750,9 +1750,6 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
    }
    base_ir = NULL;
 
-   if (key->nr_userclip_plane_consts > 0)
-      setup_uniform_clipplane_values(clip_planes);
-
    emit_thread_end();
 
    calculate_cfg();
@@ -1979,9 +1976,10 @@ brw_vs_emit(struct brw_context *brw,
       prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
 
       vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
-                        vp, prog, mem_ctx, st_index,
+                        vp, prog, brw_select_clip_planes(&brw->ctx),
+                        mem_ctx, st_index,
                         !_mesa_is_gles3(&brw->ctx));
-      if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+      if (!v.run()) {
          if (prog) {
             prog->LinkStatus = false;
             ralloc_strcat(&prog->InfoLog, v.fail_msg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 673a29e4b7f..7c319326f04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -173,10 +173,9 @@ public:
 
    struct hash_table *variable_ht;
 
-   bool run(gl_clip_plane *clip_planes);
+   bool run();
    void fail(const char *msg, ...);
 
-   void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
    virtual void setup_vec4_uniform_value(unsigned param_offset,
                                          const gl_constant_value *values,
                                          unsigned n);
@@ -359,7 +358,6 @@ public:
 
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
-   void emit_clip_distances(dst_reg reg, int offset);
    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
    void emit_urb_slot(dst_reg reg, int varying);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 019efecac66..4fd626f52c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -679,7 +679,7 @@ brw_gs_emit(struct brw_context *brw,
 
          vec4_gs_visitor v(brw->intelScreen->compiler, brw,
                            c, prog, mem_ctx, true /* no_spills */, st_index);
-         if (v.run(NULL /* clip planes */)) {
+         if (v.run()) {
             return generate_assembly(brw, prog, &c->gp->program.Base,
                                      &c->prog_data.base, mem_ctx, v.cfg,
                                      final_assembly_size);
@@ -727,7 +727,7 @@ brw_gs_emit(struct brw_context *brw,
                                c, prog, mem_ctx, false /* no_spills */,
                                st_index);
 
-   if (!gs->run(NULL /* clip planes */)) {
+   if (!gs->run()) {
       prog->LinkStatus = false;
       ralloc_strcat(&prog->InfoLog, gs->fail_msg);
    } else {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4d3d28164b5..978d28c05ff 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -755,22 +755,6 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
    }
 }
 
-void
-vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
-{
-   for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
-      assert(this->uniforms < uniform_array_size);
-      this->uniform_vector_size[this->uniforms] = 4;
-      this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
-      this->userplane[i].type = BRW_REGISTER_TYPE_F;
-      for (int j = 0; j < 4; ++j) {
-         stage_prog_data->param[this->uniforms * 4 + j] =
-            (gl_constant_value *) &clip_planes[i][j];
-      }
-      ++this->uniforms;
-   }
-}
-
 /* Our support for builtin uniforms is even scarier than non-builtin.
  * It sits on top of the PROG_STATE_VAR parameters that are
  * automatically updated from GL context state.
@@ -3203,35 +3187,6 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
    }
 }
 
-void
-vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
-{
-   /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
-    *
-    *     "If a linked set of shaders forming the vertex stage contains no
-    *     static write to gl_ClipVertex or gl_ClipDistance, but the
-    *     application has requested clipping against user clip planes through
-    *     the API, then the coordinate written to gl_Position is used for
-    *     comparison against the user clip planes."
-    *
-    * This function is only called if the shader didn't write to
-    * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
-    * if the user wrote to it; otherwise we use gl_Position.
-    */
-   gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
-   if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
-      clip_vertex = VARYING_SLOT_POS;
-   }
-
-   for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
-        ++i) {
-      reg.writemask = 1 << i;
-      emit(DP4(reg,
-               src_reg(output_reg[clip_vertex]),
-               src_reg(this->userplane[i + offset])));
-   }
-}
-
 vec4_instruction *
 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
 {
@@ -3354,17 +3309,6 @@ vec4_visitor::emit_vertex()
       emit_ndc_computation();
    }
 
-   /* Lower legacy ff and ClipVertex clipping to clip distances */
-   if (key->nr_userclip_plane_consts > 0) {
-      current_annotation = "user clip distances";
-
-      output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
-      output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
-
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
-   }
-
    /* We may need to split this up into several URB writes, so do them in a
     * loop.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 620f652d6dc..f81ee4e5bc0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -201,9 +201,69 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete)
 }
 
 
+void
+vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
+{
+   /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
+    *
+    *     "If a linked set of shaders forming the vertex stage contains no
+    *     static write to gl_ClipVertex or gl_ClipDistance, but the
+    *     application has requested clipping against user clip planes through
+    *     the API, then the coordinate written to gl_Position is used for
+    *     comparison against the user clip planes."
+    *
+    * This function is only called if the shader didn't write to
+    * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
+    * if the user wrote to it; otherwise we use gl_Position.
+    */
+   gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
+   if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
+      clip_vertex = VARYING_SLOT_POS;
+   }
+
+   for (int i = 0; i + offset < key->base.nr_userclip_plane_consts && i < 4;
+        ++i) {
+      reg.writemask = 1 << i;
+      emit(DP4(reg,
+               src_reg(output_reg[clip_vertex]),
+               src_reg(this->userplane[i + offset])));
+   }
+}
+
+
+void
+vec4_vs_visitor::setup_uniform_clipplane_values()
+{
+   for (int i = 0; i < key->base.nr_userclip_plane_consts; ++i) {
+      assert(this->uniforms < uniform_array_size);
+      this->uniform_vector_size[this->uniforms] = 4;
+      this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
+      this->userplane[i].type = BRW_REGISTER_TYPE_F;
+      for (int j = 0; j < 4; ++j) {
+         stage_prog_data->param[this->uniforms * 4 + j] =
+            (gl_constant_value *) &clip_planes[i][j];
+      }
+      ++this->uniforms;
+   }
+}
+
+
 void
 vec4_vs_visitor::emit_thread_end()
 {
+   setup_uniform_clipplane_values();
+
+   /* Lower legacy ff and ClipVertex clipping to clip distances */
+   if (key->base.nr_userclip_plane_consts > 0) {
+      current_annotation = "user clip distances";
+
+      output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
+      output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
+
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
+   }
+
    /* For VS, we always end the thread by emitting a single vertex.
     * emit_urb_write_opcode() will take care of setting the eot flag on the
     * SEND instruction.
@@ -218,6 +278,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
                                  struct brw_vs_prog_data *vs_prog_data,
                                  struct gl_vertex_program *vp,
                                  struct gl_shader_program *prog,
+                                 gl_clip_plane *clip_planes,
                                  void *mem_ctx,
                                  int shader_time_index,
                                  bool use_legacy_snorm_formula)
@@ -229,6 +290,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
      key(key),
      vs_prog_data(vs_prog_data),
      vp(vp),
+     clip_planes(clip_planes),
      use_legacy_snorm_formula(use_legacy_snorm_formula)
 {
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 1d9bee11c56..e98679affc1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -90,6 +90,7 @@ public:
                    struct brw_vs_prog_data *vs_prog_data,
                    struct gl_vertex_program *vp,
                    struct gl_shader_program *prog,
+                   gl_clip_plane *clip_planes,
                    void *mem_ctx,
                    int shader_time_index,
                    bool use_legacy_snorm_formula);
@@ -107,6 +108,8 @@ protected:
 private:
    int setup_attributes(int payload_reg);
    void setup_vp_regs();
+   void setup_uniform_clipplane_values();
+   void emit_clip_distances(dst_reg reg, int offset);
    dst_reg get_vp_dst_reg(const prog_dst_register &dst);
    src_reg get_vp_src_reg(const prog_src_register &src);
 
@@ -116,6 +119,8 @@ private:
    src_reg *vp_temp_regs;
    src_reg vp_addr_reg;
 
+   gl_clip_plane *clip_planes;
+
    bool use_legacy_snorm_formula;
 };
 
-- 
cgit v1.2.3


From 27e83b62bb52de7a681ed82679a707555023f43d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 23:55:28 -0700
Subject: i965: Store a key_tex pointer in vec4_visitor.

I'm about to remove the base class for VS/GS/HS/DS program keys, at
which point we won't be able to use key->tex anymore.  Instead, we'll
need to store a direct pointer (like we do in the FS backend).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp     |  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 13 +++++++------
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7c319326f04..10439f21af5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -101,6 +101,7 @@ public:
    }
 
    const struct brw_vue_prog_key * const key;
+   const struct brw_sampler_prog_key_data * const key_tex;
    struct brw_vue_prog_data * const prog_data;
    unsigned int sanity_param_count;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 59e440a899d..f3dc1123942 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1409,7 +1409,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
     * emitting anything other than setting up the constant result.
     */
    if (instr->op == nir_texop_tg4) {
-      int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+      int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
       if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
          emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
          return;
@@ -1471,7 +1471,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
          sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
          assert(coord_type != NULL);
          if (devinfo->gen >= 7 &&
-             key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+             key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
             mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
          } else {
             mcs = src_reg(0u);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 978d28c05ff..f85f2f95a02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2693,7 +2693,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    }
 
    if (devinfo->gen == 6 && op == ir_tg4) {
-      emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+      emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
    }
 
    swizzle_result(op, dest,
@@ -2745,7 +2745,7 @@ vec4_visitor::visit(ir_texture *ir)
     */
    if (ir->op == ir_tg4) {
       ir_constant *chan = ir->lod_info.component->as_constant();
-      int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+      int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
       if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
          dst_reg result(this, ir->type);
          this->result = src_reg(result);
@@ -2803,7 +2803,7 @@ vec4_visitor::visit(ir_texture *ir)
       ir->lod_info.sample_index->accept(this);
       sample_index = this->result;
 
-      if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+      if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
          mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
       else
          mcs = src_reg(0u);
@@ -2881,14 +2881,14 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
 uint32_t
 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
 {
-   int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+   int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
    switch (swiz) {
       case SWIZZLE_X: return 0;
       case SWIZZLE_Y:
          /* gather4 sampler is broken for green channel on RG32F --
           * we must ask for blue instead.
           */
-         if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+         if (key_tex->gather_channel_quirk_mask & (1 << sampler))
             return 2;
          return 1;
       case SWIZZLE_Z: return 2;
@@ -2903,7 +2903,7 @@ vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
                              src_reg orig_val, uint32_t sampler,
                              const glsl_type *dest_type)
 {
-   int s = key->tex.swizzles[sampler];
+   int s = key_tex->swizzles[sampler];
 
    dst_reg swizzled_result = dest;
 
@@ -3717,6 +3717,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
    : backend_shader(compiler, log_data, mem_ctx,
                     shader_prog, prog, &prog_data->base, stage),
      key(key),
+     key_tex(&key->tex),
      prog_data(prog_data),
      sanity_param_count(0),
      fail_msg(NULL),
-- 
cgit v1.2.3


From 323962182547aeafcdb3bac28434ef81f70eb785 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 28 Aug 2015 00:29:05 -0700
Subject: i965: Virtualize vec4_visitor::emit_urb_slot().

This avoids a downcast of key, which won't exist in the base class soon.

I'm not a huge fan of this patch, but given that we're currently using
inheritance, this seems like the "right" way to do it.  The alternative
is to make key a void pointer in the parent class and continue
downcasting.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vec4.h              |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    | 15 -------------
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 26 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.h                |  1 +
 4 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 10439f21af5..7df87b5d841 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -360,7 +360,7 @@ public:
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
-   void emit_urb_slot(dst_reg reg, int varying);
+   virtual void emit_urb_slot(dst_reg reg, int varying);
 
    void emit_shader_time_begin();
    void emit_shader_time_end();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f85f2f95a02..034286fbf2e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3233,21 +3233,6 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
    case BRW_VARYING_SLOT_PAD:
       /* No need to write to this slot */
       break;
-   case VARYING_SLOT_COL0:
-   case VARYING_SLOT_COL1:
-   case VARYING_SLOT_BFC0:
-   case VARYING_SLOT_BFC1: {
-      /* These built-in varyings are only supported in compatibility mode,
-       * and we only support GS in core profile.  So, this must be a vertex
-       * shader.
-       */
-      assert(stage == MESA_SHADER_VERTEX);
-      vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
-      if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
-         inst->saturate = true;
-      break;
-   }
-
    default:
       emit_generic_urb_slot(reg, varying);
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index f81ee4e5bc0..442cefd6c43 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -201,6 +201,32 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete)
 }
 
 
+void
+vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
+{
+   reg.type = BRW_REGISTER_TYPE_F;
+   output_reg[varying].type = reg.type;
+
+   switch (varying) {
+   case VARYING_SLOT_COL0:
+   case VARYING_SLOT_COL1:
+   case VARYING_SLOT_BFC0:
+   case VARYING_SLOT_BFC1: {
+      /* These built-in varyings are only supported in compatibility mode,
+       * and we only support GS in core profile.  So, this must be a vertex
+       * shader.
+       */
+      vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
+      if (key->clamp_vertex_color)
+         inst->saturate = true;
+      break;
+   }
+   default:
+      return vec4_visitor::emit_urb_slot(reg, varying);
+   }
+}
+
+
 void
 vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index e98679affc1..3a847fcd28a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -103,6 +103,7 @@ protected:
    virtual void emit_program_code();
    virtual void emit_thread_end();
    virtual void emit_urb_write_header(int mrf);
+   virtual void emit_urb_slot(dst_reg reg, int varying);
    virtual vec4_instruction *emit_urb_write_opcode(bool complete);
 
 private:
-- 
cgit v1.2.3


From 4f4b7c4711d98606270133dfd456acabfa8267a6 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 27 Aug 2015 18:24:39 -0700
Subject: i965: Remove the brw_vue_prog_key base class.

The legacy userclip fields are only used for the vertex shader, and at
that point there's only program_string_id and the tex struct, which are
common to all keys.  So there's no need for a "VUE" key base class.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp      | 12 +++++----
 src/mesa/drivers/dri/i965/brw_gs.c                |  7 ++---
 src/mesa/drivers/dri/i965/brw_program.h           | 32 +++++++++++------------
 src/mesa/drivers/dri/i965/brw_vec4.cpp            | 12 ---------
 src/mesa/drivers/dri/i965/brw_vec4.h              |  8 +-----
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    |  5 ++--
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  8 +++---
 src/mesa/drivers/dri/i965/brw_vs.c                | 22 ++++++++--------
 9 files changed, 45 insertions(+), 63 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 7100646750e..5cb794b5fd1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -783,8 +783,8 @@ fs_visitor::emit_fb_writes()
 void
 fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
 {
-   const struct brw_vue_prog_key *key =
-      (const struct brw_vue_prog_key *) this->key;
+   const struct brw_vs_prog_key *key =
+      (const struct brw_vs_prog_key *) this->key;
 
    for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
       this->userplane[i] = fs_reg(UNIFORM, uniforms);
@@ -806,8 +806,8 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
 {
    struct brw_vue_prog_data *vue_prog_data =
       (struct brw_vue_prog_data *) prog_data;
-   const struct brw_vue_prog_key *key =
-      (const struct brw_vue_prog_key *) this->key;
+   const struct brw_vs_prog_key *key =
+      (const struct brw_vs_prog_key *) this->key;
 
    /* Bail unless some sort of legacy clipping is enabled */
    if (key->nr_userclip_plane_consts == 0)
@@ -1076,8 +1076,10 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
       key_tex = &((const brw_wm_prog_key *) key)->tex;
       break;
    case MESA_SHADER_VERTEX:
+      key_tex = &((const brw_vs_prog_key *) key)->tex;
+      break;
    case MESA_SHADER_GEOMETRY:
-      key_tex = &((const brw_vue_prog_key *) key)->tex;
+      key_tex = &((const brw_gs_prog_key *) key)->tex;
       break;
    case MESA_SHADER_COMPUTE:
       key_tex = &((const brw_cs_prog_key*) key)->tex;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index f1da63543c4..1c1a0953bbe 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -300,11 +300,11 @@ brw_gs_populate_key(struct brw_context *brw,
 
    memset(key, 0, sizeof(*key));
 
-   key->base.program_string_id = gp->id;
+   key->program_string_id = gp->id;
 
    /* _NEW_TEXTURE */
    brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                      &key->base.tex);
+                                      &key->tex);
 
    /* BRW_NEW_VUE_MAP_VS */
    key->input_varyings = brw->vue_map_vs.slots_valid;
@@ -381,7 +381,8 @@ brw_gs_precompile(struct gl_context *ctx,
 
    memset(&key, 0, sizeof(key));
 
-   brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+   key.program_string_id = bgp->id;
 
    /* Assume that the set of varyings coming in from the vertex shader exactly
     * matches what the geometry shader requires.
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 5ebf9226cdd..00e8f3f370e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -78,24 +78,9 @@ struct brw_sampler_prog_key_data {
 };
 
 
-struct brw_vue_prog_key {
-   unsigned program_string_id;
-
-   /**
-    * How many user clipping planes are being uploaded to the vertex shader as
-    * push constants.
-    *
-    * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
-    * clip distances.
-    */
-   unsigned nr_userclip_plane_consts:4;
-
-   struct brw_sampler_prog_key_data tex;
-};
-
 /** The program key for Vertex Shaders. */
 struct brw_vs_prog_key {
-   struct brw_vue_prog_key base;
+   unsigned program_string_id;
 
    /*
     * Per-attribute workaround flags
@@ -106,6 +91,15 @@ struct brw_vs_prog_key {
 
    bool clamp_vertex_color:1;
 
+   /**
+    * How many user clipping planes are being uploaded to the vertex shader as
+    * push constants.
+    *
+    * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+    * clip distances.
+    */
+   unsigned nr_userclip_plane_consts:4;
+
    /**
     * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
     * are going to be replaced with point coordinates (as a consequence of a
@@ -115,14 +109,18 @@ struct brw_vs_prog_key {
     * the VUE, even if they aren't written by the vertex shader.
     */
    uint8_t point_coord_replace;
+
+   struct brw_sampler_prog_key_data tex;
 };
 
 /** The program key for Geometry Shaders. */
 struct brw_gs_prog_key
 {
-   struct brw_vue_prog_key base;
+   unsigned program_string_id;
 
    uint64_t input_varyings;
+
+   struct brw_sampler_prog_key_data tex;
 };
 
 /** The program key for Fragment/Pixel Shaders. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 073e90beb60..24b2be916a6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2011,16 +2011,4 @@ brw_vs_emit(struct brw_context *brw,
    return assembly;
 }
 
-
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                      struct brw_vue_prog_key *key,
-                                      GLuint id, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   key->program_string_id = id;
-
-   brw_setup_tex_for_precompile(brw, &key->tex, prog);
-}
-
 } /* extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7df87b5d841..01c6e8492c7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -52,11 +52,6 @@ extern "C" {
 extern "C" {
 #endif
 
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                      struct brw_vue_prog_key *key,
-                                      GLuint id, struct gl_program *prog);
-
 #ifdef __cplusplus
 } /* extern "C" */
 
@@ -76,7 +71,7 @@ public:
    vec4_visitor(const struct brw_compiler *compiler,
                 void *log_data,
                 struct gl_program *prog,
-                const struct brw_vue_prog_key *key,
+                const struct brw_sampler_prog_key_data *key,
                 struct brw_vue_prog_data *prog_data,
 		struct gl_shader_program *shader_prog,
                 gl_shader_stage stage,
@@ -100,7 +95,6 @@ public:
       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    }
 
-   const struct brw_vue_prog_key * const key;
    const struct brw_sampler_prog_key_data * const key_tex;
    struct brw_vue_prog_data * const prog_data;
    unsigned int sanity_param_count;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 4fd626f52c9..b9694f67787 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -42,7 +42,7 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
                                  bool no_spills,
                                  int shader_time_index)
    : vec4_visitor(compiler, log_data,
-                  &c->gp->program.Base, &c->key.base,
+                  &c->gp->program.Base, &c->key.tex,
                   &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
                   no_spills, shader_time_index),
      c(c)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 034286fbf2e..0ff298ed0e9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3692,7 +3692,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                            void *log_data,
                            struct gl_program *prog,
-                           const struct brw_vue_prog_key *key,
+                           const struct brw_sampler_prog_key_data *key_tex,
                            struct brw_vue_prog_data *prog_data,
 			   struct gl_shader_program *shader_prog,
                            gl_shader_stage stage,
@@ -3701,8 +3701,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                            int shader_time_index)
    : backend_shader(compiler, log_data, mem_ctx,
                     shader_prog, prog, &prog_data->base, stage),
-     key(key),
-     key_tex(&key->tex),
+     key_tex(key_tex),
      prog_data(prog_data),
      sanity_param_count(0),
      fail_msg(NULL),
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 442cefd6c43..f4b50ba9d9f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -247,7 +247,7 @@ vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
       clip_vertex = VARYING_SLOT_POS;
    }
 
-   for (int i = 0; i + offset < key->base.nr_userclip_plane_consts && i < 4;
+   for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
         ++i) {
       reg.writemask = 1 << i;
       emit(DP4(reg,
@@ -260,7 +260,7 @@ vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
 void
 vec4_vs_visitor::setup_uniform_clipplane_values()
 {
-   for (int i = 0; i < key->base.nr_userclip_plane_consts; ++i) {
+   for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
       assert(this->uniforms < uniform_array_size);
       this->uniform_vector_size[this->uniforms] = 4;
       this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
@@ -280,7 +280,7 @@ vec4_vs_visitor::emit_thread_end()
    setup_uniform_clipplane_values();
 
    /* Lower legacy ff and ClipVertex clipping to clip distances */
-   if (key->base.nr_userclip_plane_consts > 0) {
+   if (key->nr_userclip_plane_consts > 0) {
       current_annotation = "user clip distances";
 
       output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
@@ -309,7 +309,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
                                  int shader_time_index,
                                  bool use_legacy_snorm_formula)
    : vec4_visitor(compiler, log_data,
-                  &vp->Base, &key->base, &vs_prog_data->base, prog,
+                  &vp->Base, &key->tex, &vs_prog_data->base, prog,
                   MESA_SHADER_VERTEX,
                   mem_ctx, false /* no_spills */,
                   shader_time_index),
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 365396825dc..df7e5314d6e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -130,7 +130,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
    /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
     * planes as uniforms.
     */
-   param_count += key->base.nr_userclip_plane_consts * 4;
+   param_count += key->nr_userclip_plane_consts * 4;
 
    stage_prog_data->param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
@@ -172,7 +172,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
     * distance varying slots whenever clipping is enabled, even if the vertex
     * shader doesn't write to gl_ClipDistance.
     */
-   if (key->base.nr_userclip_plane_consts > 0) {
+   if (key->nr_userclip_plane_consts > 0) {
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
    }
@@ -237,7 +237,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
          if (c->cache_id == BRW_CACHE_VS_PROG) {
             old_key = c->key;
 
-            if (old_key->base.program_string_id == key->base.program_string_id)
+            if (old_key->program_string_id == key->program_string_id)
                break;
          }
       }
@@ -258,8 +258,8 @@ brw_vs_debug_recompile(struct brw_context *brw,
    }
 
    found |= key_debug(brw, "legacy user clipping",
-                      old_key->base.nr_userclip_plane_consts,
-                      key->base.nr_userclip_plane_consts);
+                      old_key->nr_userclip_plane_consts,
+                      key->nr_userclip_plane_consts);
 
    found |= key_debug(brw, "copy edgeflag",
                       old_key->copy_edgeflag, key->copy_edgeflag);
@@ -268,8 +268,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
    found |= key_debug(brw, "vertex color clamping",
                       old_key->clamp_vertex_color, key->clamp_vertex_color);
 
-   found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex,
-                                            &key->base.tex);
+   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
 
    if (!found) {
       perf_debug("  Something else\n");
@@ -306,11 +305,11 @@ brw_vs_populate_key(struct brw_context *brw,
    /* Just upload the program verbatim for now.  Always send it all
     * the inputs it asks for, whether they are varying or not.
     */
-   key->base.program_string_id = vp->id;
+   key->program_string_id = vp->id;
 
    if (ctx->Transform.ClipPlanesEnabled != 0 &&
        !vp->program.Base.UsesClipDistanceOut) {
-      key->base.nr_userclip_plane_consts =
+      key->nr_userclip_plane_consts =
          _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
    }
 
@@ -336,7 +335,7 @@ brw_vs_populate_key(struct brw_context *brw,
 
    /* _NEW_TEXTURE */
    brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-                                      &key->base.tex);
+                                      &key->tex);
 
    /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
    memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
@@ -398,7 +397,8 @@ brw_vs_precompile(struct gl_context *ctx,
 
    memset(&key, 0, sizeof(key));
 
-   brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bvp->id, &vp->Base);
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+   key.program_string_id = bvp->id;
    key.clamp_vertex_color =
       (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
                                VARYING_BIT_BFC0 | VARYING_BIT_BFC1));
-- 
cgit v1.2.3


From fba4823a916b63f279955dbe6d98b6efc666f53e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 28 Aug 2015 01:43:23 -0700
Subject: i965: Don't do legacy userclipping in non-compatibility contexts.

According to the GLSL 1.50 specification, page 76:
"The shader must also set all values in gl_ClipDistance that have been
 enabled via the OpenGL API, or results are undefined."

With this patch, we only enable clip distance writes when the shader
actually writes them.  We no longer force a value to be written when
clip planes are enabled in the API.  This could mean the first varying
slot would be used as clip distances - I believe it should be the safe
kind of undefined behavior.

Empirically, it doesn't seem to cause a problem.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index df7e5314d6e..05457d4a3fe 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -308,6 +308,7 @@ brw_vs_populate_key(struct brw_context *brw,
    key->program_string_id = vp->id;
 
    if (ctx->Transform.ClipPlanesEnabled != 0 &&
+       ctx->API == API_OPENGL_COMPAT &&
        !vp->program.Base.UsesClipDistanceOut) {
       key->nr_userclip_plane_consts =
          _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-- 
cgit v1.2.3


From 6e03377daf90c6f6ac953a08e3a8f8f5caf9abf6 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 28 Aug 2015 23:47:25 -0700
Subject: i965/gs: Don't reserve space for clip plane uniforms.

These were only for legacy userclipping, which we no longer support
in geometry shaders.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_gs.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 1c1a0953bbe..04d9f3f25d9 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -62,8 +62,6 @@ brw_codegen_gs_prog(struct brw_context *brw,
    struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
    int param_count = gs->num_uniform_components * 4;
 
-   /* We also upload clip plane data as uniforms */
-   param_count += MAX_CLIP_PLANES * 4;
    param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
 
    c.prog_data.base.base.param =
-- 
cgit v1.2.3


From 0e23c246c0eb84bd46dfdfe6babe3762e49c58d2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 15 May 2015 10:08:19 -0700
Subject: i965: Optimize VUE map comparisons.

The entire VUE map is computed based on the slots_valid bitfield;
calling brw_compute_vue_map on the same bitfield will return the
same result.  So we can simply compare those.

struct brw_vue_map is 136 bytes; doing a single 8-byte comparison is
much cheaper and should work just as well.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_gs.c | 4 ++--
 src/mesa/drivers/dri/i965/brw_vs.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 04d9f3f25d9..16ea6846285 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -356,8 +356,8 @@ brw_upload_gs_prog(struct brw_context *brw)
    }
    brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
 
-   if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-              sizeof(brw->vue_map_geom_out)) != 0) {
+   if (brw->gs.prog_data->base.vue_map.slots_valid !=
+       brw->vue_map_geom_out.slots_valid) {
       brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 05457d4a3fe..4e0d34f6c6f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -368,8 +368,8 @@ brw_upload_vs_prog(struct brw_context *brw)
    }
    brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
 
-   if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-              sizeof(brw->vue_map_geom_out)) != 0) {
+   if (brw->vs.prog_data->base.vue_map.slots_valid !=
+       brw->vue_map_geom_out.slots_valid) {
       brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
       if (brw->gen < 6) {
-- 
cgit v1.2.3


From 4323e78d3f6d935cb75fc20375e6730613d41119 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 13 Aug 2015 14:52:55 -0700
Subject: i965: Improve disassembly of data port read messages.

We now print out the name of the message instead of its numerical
value, and label the message control and surface numbers.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_disasm.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 1075c5acba5..61be2b04fee 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -412,6 +412,22 @@ static const char *const gen7_gateway_subfuncid[8] = {
    [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
 };
 
+static const char *const gen4_dp_read_port_msg_type[4] = {
+   [0b00] = "OWord Block Read",
+   [0b01] = "OWord Dual Block Read",
+   [0b10] = "Media Block Read",
+   [0b11] = "DWord Scattered Read",
+};
+
+static const char *const g45_dp_read_port_msg_type[8] = {
+   [0b000] = "OWord Block Read",
+   [0b010] = "OWord Dual Block Read",
+   [0b100] = "Media Block Read",
+   [0b110] = "DWord Scattered Read",
+   [0b001] = "Render Target UNORM Read",
+   [0b011] = "AVC Loop Filter Read",
+};
+
 static const char *const dp_write_port_msg_type[8] = {
    [0b000] = "OWord block write",
    [0b001] = "OWord dual block write",
@@ -1444,10 +1460,17 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
                       brw_inst_dp_msg_type(devinfo, inst),
                       devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst));
             } else {
-               format(file, " (%ld, %ld, %ld)",
-                      brw_inst_binding_table_index(devinfo, inst),
-                      brw_inst_dp_read_msg_control(devinfo, inst),
-                      brw_inst_dp_read_msg_type(devinfo, inst));
+               bool is_965 = devinfo->gen == 4 && !devinfo->is_g4x;
+               err |= control(file, "DP read message type",
+                              is_965 ? gen4_dp_read_port_msg_type :
+                                       g45_dp_read_port_msg_type,
+                              brw_inst_dp_read_msg_type(devinfo, inst),
+                              &space);
+
+               format(file, " MsgCtrl = 0x%lx",
+                      brw_inst_dp_read_msg_control(devinfo, inst));
+
+               format(file, " Surface = %ld", brw_inst_binding_table_index(devinfo, inst));
             }
             break;
 
-- 
cgit v1.2.3


From 69678953d1740a5d27da4f9dd522f68d5a95d223 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Thu, 3 Sep 2015 14:20:46 +0300
Subject: glsl: error on linking if no shaders are attached to program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This applies to OpenGL Core >= 4.5 and OpenGL ES >= 3.1.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/linker.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 59e3e9c38a6..fb1d955aef5 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3466,6 +3466,25 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    prog->Version = max_version;
    prog->IsES = is_es_prog;
 
+   /* From OpenGL 4.5 Core specification (7.3 Program Objects):
+    *     "Linking can fail for a variety of reasons as specified in the OpenGL
+    *     Shading Language Specification, as well as any of the following
+    *     reasons:
+    *
+    *     * No shader objects are attached to program.
+    *
+    *     ..."
+    *
+    *     Same rule applies for OpenGL ES >= 3.1.
+    */
+
+   if (prog->NumShaders == 0 &&
+       ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) ||
+        (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) {
+      linker_error(prog, "No shader objects are attached to program.\n");
+      goto done;
+   }
+
    /* Some shaders have to be linked with some other shaders present.
     */
    if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
-- 
cgit v1.2.3


From 08e9049e3d204265d803aac5ff6bf0b4e5d82d00 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Thu, 3 Sep 2015 14:26:48 +0300
Subject: glsl: error out on ES 3.1 if VS or FS present but not both
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/linker.cpp | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index fb1d955aef5..4838ab44339 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3889,10 +3889,31 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     * behavior specified in GLSL specification.
     */
    if (!prog->SeparateShader && ctx->API == API_OPENGLES2) {
-      if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-	 linker_error(prog, "program lacks a vertex shader\n");
-      } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-	 linker_error(prog, "program lacks a fragment shader\n");
+      /* With ES < 3.1 one needs to have always vertex + fragment shader. */
+      if (ctx->Version < 31) {
+         if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
+	    linker_error(prog, "program lacks a vertex shader\n");
+         } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
+	    linker_error(prog, "program lacks a fragment shader\n");
+         }
+      } else {
+         /* From OpenGL ES 3.1 specification (7.3 Program Objects):
+          *     "Linking can fail for a variety of reasons as specified in the
+          *     OpenGL ES Shading Language Specification, as well as any of the
+          *     following reasons:
+          *
+          *     ...
+          *
+          *     * program contains objects to form either a vertex shader or
+          *       fragment shader, and program is not separable, and does not
+          *       contain objects to form both a vertex shader and fragment
+          *       shader."
+          */
+         if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^
+             !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
+            linker_error(prog, "Program needs to contain both vertex and "
+                         "fragment shaders.\n");
+         }
       }
    }
 
-- 
cgit v1.2.3


From 3d4f75506c9233ca4039021024c4b918cc974f86 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 3 Sep 2015 14:50:12 +0300
Subject: dri/common: Fix codestyle of driParseDebugString().

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/common/utils.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
index 43d90d90599..1e3b15b6190 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -43,19 +43,17 @@
 
 
 uint64_t
-driParseDebugString( const char * debug, 
-		     const struct dri_debug_control * control  )
+driParseDebugString(const char *debug,
+                    const struct dri_debug_control *control)
 {
    uint64_t flag = 0;
 
-   if ( debug != NULL ) {
-      while( control->string != NULL ) {
-	 if ( !strcmp( debug, "all" ) ||
-	      strstr( debug, control->string ) != NULL ) {
+   if (debug != NULL) {
+      for (; control->string != NULL; control++) {
+	 if (!strcmp(debug, "all") ||
+	     strstr(debug, control->string) != NULL) {
 	    flag |= control->flag;
 	 }
-
-	 control++;
       }
    }
 
-- 
cgit v1.2.3


From 6cf4142db88796ff66a73a36530467101533fdb6 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 3 Sep 2015 15:20:04 +0300
Subject: dri/common: Tokenize driParseDebugString() argument before matching
 debug flags.

Fixes debug string parsing when one of the supported flags is a
substring of another.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/common/utils.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
index 1e3b15b6190..1246bec6e02 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -50,10 +50,19 @@ driParseDebugString(const char *debug,
 
    if (debug != NULL) {
       for (; control->string != NULL; control++) {
-	 if (!strcmp(debug, "all") ||
-	     strstr(debug, control->string) != NULL) {
-	    flag |= control->flag;
-	 }
+         if (!strcmp(debug, "all")) {
+            flag |= control->flag;
+
+         } else {
+            const char *s = debug;
+            unsigned n;
+
+            for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
+               if (strlen(control->string) == n &&
+                   !strncmp(control->string, s, n))
+                  flag |= control->flag;
+            }
+         }
       }
    }
 
-- 
cgit v1.2.3


From bd6e516fc24128d604f677a16f692d88d65a49f1 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 23 Jul 2015 11:11:53 +0200
Subject: i965: Add a debug option for spilling everything in vec4 code

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp            | 2 +-
 src/mesa/drivers/dri/i965/intel_debug.c           | 3 ++-
 src/mesa/drivers/dri/i965/intel_debug.h           | 5 +++--
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 6eb988938d4..570b4fedffe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -649,7 +649,7 @@ fs_visitor::assign_regs(bool allow_spilling)
    }
 
    /* Debug of register spilling: Go spill everything. */
-   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
+   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
       int reg = choose_spill_reg(g);
 
       if (reg != -1) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 24b2be916a6..85dc37238d1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1833,7 +1833,7 @@ vec4_visitor::run()
 
    setup_payload();
 
-   if (false) {
+   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
       /* Debug of register spilling: Go spill everything. */
       const int grf_count = alloc.count;
       float spill_costs[alloc.count];
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index b3b3c21f491..5a9c9533fde 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -69,7 +69,8 @@ static const struct dri_debug_control debug_control[] = {
    { "ann",         DEBUG_ANNOTATION },
    { "no8",         DEBUG_NO8 },
    { "vec4",        DEBUG_VEC4VS },
-   { "spill",       DEBUG_SPILL },
+   { "spill_fs",    DEBUG_SPILL_FS },
+   { "spill_vec4",  DEBUG_SPILL_VEC4 },
    { "cs",          DEBUG_CS },
    { NULL,    0 }
 };
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 4689492e1fd..b7d0c823fa8 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -64,8 +64,9 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_ANNOTATION          (1ull << 28)
 #define DEBUG_NO8                 (1ull << 29)
 #define DEBUG_VEC4VS              (1ull << 30)
-#define DEBUG_SPILL               (1ull << 31)
-#define DEBUG_CS                  (1ull << 32)
+#define DEBUG_SPILL_FS            (1ull << 31)
+#define DEBUG_SPILL_VEC4          (1ull << 32)
+#define DEBUG_CS                  (1ull << 33)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
cgit v1.2.3


From 96ea1663082c4a17eaf59873190f1e9677623d91 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 4 Sep 2015 13:23:20 +0200
Subject: i965/vec4: Don't unspill the same register in consecutive
 instructions

If we have spilled/unspilled a register in the current instruction, avoid
emitting unspills for the same register in the same instruction or consecutive
instructions following the current one as long as they keep reading the spilled
register. This should allow us to avoid emitting costy unspills that come with
little benefit to register allocation.

v2:
  - Apply the same logic when evaluating spilling costs (Curro).

v3:
  - Abstract the logic that decides if a register can be reused in a function.
    that can be used from both spill_reg and evaluate_spill_costs (Curro).

v4:
  - Do not disallow reusing scratch_reg in predicated reads (Curro).
  - Track if previous sources in the same instruction read scratch_reg (Curro).
  - Return prev_inst_read_scratch_reg at the end (Curro).
  - No need to explicitily skip scratch read/write opcodes in spill_reg (Curro).
  - Fix the comments explaining what happens when we hit an instruction that
    does not read or write scratch_reg (Curro)
  - Return true early when the current or previous instructions read
    scratch_reg with a compatible mask.

v5:
  - Do not return true early, the loop should not be expensive anyway
    and this adds more complexity (Curro).

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp     | 126 +++++++++++++++++++--
 1 file changed, 118 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 62ed7084883..a49eca56118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -267,6 +267,97 @@ vec4_visitor::reg_allocate()
    return true;
 }
 
+/**
+ * When we decide to spill a register, instead of blindly spilling every use,
+ * save unspills when the spill register is used (read) in consecutive
+ * instructions. This can potentially save a bunch of unspills that would
+ * have very little impact in register allocation anyway.
+ *
+ * Notice that we need to account for this behavior when spilling a register
+ * and when evaluating spilling costs. This function is designed so it can
+ * be called from both places and avoid repeating the logic.
+ *
+ *  - When we call this function from spill_reg(), we pass in scratch_reg the
+ *    actual unspill/spill register that we want to reuse in the current
+ *    instruction.
+ *
+ *  - When we call this from evaluate_spill_costs(), we pass the register for
+ *    which we are evaluating spilling costs.
+ *
+ * In either case, we check if the previous instructions read scratch_reg until
+ * we find one that writes to it with a compatible mask or does not read/write
+ * scratch_reg at all.
+ */
+static bool
+can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
+                           unsigned scratch_reg)
+{
+   assert(inst->src[i].file == GRF);
+   bool prev_inst_read_scratch_reg = false;
+
+   /* See if any previous source in the same instructions reads scratch_reg */
+   for (unsigned n = 0; n < i; n++) {
+      if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
+         prev_inst_read_scratch_reg = true;
+   }
+
+   /* Now check if previous instructions read/write scratch_reg */
+   for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
+        !prev_inst->is_head_sentinel();
+        prev_inst = (vec4_instruction *) prev_inst->prev) {
+
+      /* If the previous instruction writes to scratch_reg then we can reuse
+       * it if the write is not conditional and the channels we write are
+       * compatible with our read mask
+       */
+      if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) {
+         return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
+                (brw_mask_for_swizzle(inst->src[i].swizzle) &
+                 ~prev_inst->dst.writemask) == 0;
+      }
+
+      /* Skip scratch read/writes so that instructions generated by spilling
+       * other registers (that won't read/write scratch_reg) do not stop us from
+       * reusing scratch_reg for this instruction.
+       */
+      if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE ||
+          prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ)
+         continue;
+
+      /* If the previous instruction does not write to scratch_reg, then check
+       * if it reads it
+       */
+      int n;
+      for (n = 0; n < 3; n++) {
+         if (prev_inst->src[n].file == GRF &&
+             prev_inst->src[n].reg == scratch_reg) {
+            prev_inst_read_scratch_reg = true;
+            break;
+         }
+      }
+      if (n == 3) {
+         /* The previous instruction does not read scratch_reg. At this point,
+          * if no previous instruction has read scratch_reg it means that we
+          * will need to unspill it here and we can't reuse it (so we return
+          * false). Otherwise, if we found at least one consecutive instruction
+          * that read scratch_reg, then we know that we got here from
+          * evaluate_spill_costs (since for the spill_reg path any block of
+          * consecutive instructions using scratch_reg must start with a write
+          * to that register, so we would've exited the loop in the check for
+          * the write that we have at the start of this loop), and in that case
+          * it means that we found the point at which the scratch_reg would be
+          * unspilled. Since we always unspill a full vec4, it means that we
+          * have all the channels available and we can just return true to
+          * signal that we can reuse the register in the current instruction
+          * too.
+          */
+         return prev_inst_read_scratch_reg;
+      }
+   }
+
+   return prev_inst_read_scratch_reg;
+}
+
 void
 vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
 {
@@ -284,9 +375,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-            spill_costs[inst->src[i].reg] += loop_scale;
-            if (inst->src[i].reladdr)
-               no_spill[inst->src[i].reg] = true;
+            /* We will only unspill src[i] it it wasn't unspilled for the
+             * previous instruction, in which case we'll just reuse the scratch
+             * reg for this instruction.
+             */
+            if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) {
+               spill_costs[inst->src[i].reg] += loop_scale;
+               if (inst->src[i].reladdr)
+                  no_spill[inst->src[i].reg] = true;
+            }
          }
       }
 
@@ -345,19 +442,32 @@ vec4_visitor::spill_reg(int spill_reg_nr)
    unsigned int spill_offset = last_scratch++;
 
    /* Generate spill/unspill instructions for the objects being spilled. */
+   int scratch_reg = -1;
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
-            src_reg spill_reg = inst->src[i];
-            inst->src[i].reg = alloc.allocate(1);
-            dst_reg temp = dst_reg(inst->src[i]);
-
-            emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
+            if (scratch_reg == -1 ||
+                !can_use_scratch_for_source(inst, i, scratch_reg)) {
+               /* We need to unspill anyway so make sure we read the full vec4
+                * in any case. This way, the cached register can be reused
+                * for consecutive instructions that read different channels of
+                * the same vec4.
+                */
+               scratch_reg = alloc.allocate(1);
+               src_reg temp = inst->src[i];
+               temp.reg = scratch_reg;
+               temp.swizzle = BRW_SWIZZLE_XYZW;
+               emit_scratch_read(block, inst,
+                                 dst_reg(temp), inst->src[i], spill_offset);
+            }
+            assert(scratch_reg != -1);
+            inst->src[i].reg = scratch_reg;
          }
       }
 
       if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
          emit_scratch_write(block, inst, spill_offset);
+         scratch_reg = inst->dst.reg;
       }
    }
 
-- 
cgit v1.2.3


From 099f5b3a62be1919add02a4cb887841c9f0f2fe4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Sep 2015 19:02:28 +0100
Subject: i965: Disallow PixelTransfer operations for tiled-memcpy
 TexImage/ReadPixels

The tiled memcpy fast paths perform a simple blit (with only a couple of
trivial pixel conversion routines) and do not accommodate PixelTransfer
operations. Therefore if any are set, fallback to the regular routines.
Note that PixelTransfer only applies to TexImage and ReadPixels, not to
GetTexImage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jason Ekstrand <jason.ekstrand@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/intel_pixel_read.c   | 4 ++++
 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 3fe506e3cf1..eb366cd3e34 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -109,6 +109,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
        pack->Invert)
       return false;
 
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
    /* This renderbuffer can come from a texture.  In this case, we impose
     * some of the same restrictions we have for textures and adjust for
     * miplevels.
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 31e511f0b7b..44921e5242c 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -118,6 +118,10 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
        packing->Invert)
       return false;
 
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
    if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
                          INTEL_UPLOAD))
       return false;
-- 
cgit v1.2.3


From 48de40ce9c45de154965490843f9e50407970c26 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Wed, 26 Aug 2015 19:52:50 +0800
Subject: vc4: Initialize pack field of qreg to 0 in qir_get_temp

This avoids generation of undefined packing in qir and qpu instructions,
fixing a lot of rendering errors.

Fixes 8b36d107fdd (vc4: Pack the unorm-packing bits into a src MUL
instruction when possible.)

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/drivers/vc4/vc4_qir.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 9d930715f9b..073ba5ffc23 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c)
 
         reg.file = QFILE_TEMP;
         reg.index = c->num_temps++;
+        reg.pack = 0;
 
         if (c->num_temps > c->defs_array_size) {
                 uint32_t old_size = c->defs_array_size;
-- 
cgit v1.2.3


From 3329703eb116a7ad73bc694356b43e014532240b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 Aug 2015 13:39:42 +0200
Subject: nv30: Fix creation of scanout buffers

Scanout buffers on nv30 must always be non-swizzled and have special
width alignment constraints.

These constrains have been taken from the xf86-video-nouveau
src/nv_accel_common.c: nouveau_allocate_surface() function.

nouveau_allocate_surface() applies these width constraints only when a
tiled attribute is set, which it sets for all surfaces allocated via
dri, and this "tiling" is not the same as swizzling, scanout surfaces
must be linear / have a uniform_pitch or only complete garbage is shown.

This commit fixes dri3 on nv30 showing a garbled display, with dri3 the
scanout buffers are allocated by mesa, rather then by the ddx, and the
wrong stride of these buffers was causing the garbled display.

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index c75b4b95fd8..22763471e2f 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -28,6 +28,7 @@
 #include "util/u_surface.h"
 
 #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
 #include "nv30/nv30_screen.h"
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_resource.h"
@@ -362,6 +363,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
    blocksz = util_format_get_blocksize(pt->format);
 
    if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
        !util_is_power_of_two(pt->width0) ||
        !util_is_power_of_two(pt->height0) ||
        !util_is_power_of_two(pt->depth0) ||
@@ -369,6 +371,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
        util_format_is_float(pt->format) || mt->ms_mode) {
       mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
       mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
    }
 
    if (!mt->uniform_pitch)
-- 
cgit v1.2.3


From 3c6c4d4f298ec81fe57992790a68aaab2e573519 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 3 Sep 2015 12:38:01 +0200
Subject: nv30: Implement color resolve for msaa

Note this is not ideal. Since the sifm can only do source sizes upto
1024x1024 we end up using the blitter on nv4x, which is not that fast.

And on nv3x we end up using the cpu which is really slow.

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c  | 19 ++++++++-----------
 src/gallium/drivers/nouveau/nv30/nv30_resource.h |  3 ---
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 22763471e2f..76bb8b817fc 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -145,21 +145,18 @@ nv30_resource_copy_region(struct pipe_context *pipe,
    nv30_transfer_rect(nv30, NEAREST, &src, &dst);
 }
 
-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
 {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
    struct nv30_rect src, dst;
 
-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
 
    nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
 }
 
 void
@@ -173,7 +170,7 @@ nv30_blit(struct pipe_context *pipe,
        info.dst.resource->nr_samples <= 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format) &&
        !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
       return;
    }
 
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
index 8dac7795c9d..20d86b634f2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -65,9 +65,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                           struct pipe_resource *src, unsigned src_level,
                           const struct pipe_box *src_box);
 
-void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
 void
 nv30_blit(struct pipe_context *pipe,
           const struct pipe_blit_info *blit_info);
-- 
cgit v1.2.3


From 4f2290d1612569686284609059d29a85c9de67cf Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Thu, 3 Sep 2015 19:00:26 +0300
Subject: llvmpipe: convert double to long long instead of unsigned long long

round(val*dscale) produces a double result, as val and dscale are double.
However, LLVMConstInt receives unsigned long long, so there is an
implicit conversion from double to unsigned long long.
This is an undefined behavior. Therefore, we need to first explicitly
convert the round result to long long, and then let the compiler handle
conversion from that to unsigned long long.

This bug manifests itself in POWER, where all IMM values of -1 are being
converted to 0 implicitly, causing a wrong LLVM IR output.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
CC: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_const.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index 0f5a8f8e851..9cd7c5553cf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
    else {
       double dscale = lp_const_scale(type);
 
-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
    }
 
    return elem;
-- 
cgit v1.2.3


From 5165e464f225cd1b0f61e0da0758052b9b9ff518 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Fri, 4 Sep 2015 10:42:33 -0700
Subject: i965: Remove base miplevel from sampler state.

Gen9 changes the meaning of this to coarse LOD quality mode. Although that's a
desirable thing to be setting, it doesn't match the gen8 behavior and this was
unintentional. More importantly, we don't ever use this field. So instead of
getting it "wrong" drop it entirely.

This is a respin of a patch which only [incorrectly] tried to address gen9.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_sampler_state.c | 5 +----
 src/mesa/drivers/dri/i965/brw_state.h         | 1 -
 src/mesa/drivers/dri/i965/gen6_blorp.cpp      | 1 -
 3 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c
index 2021bb3b460..c2db5f69560 100644
--- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
@@ -88,13 +88,11 @@ brw_emit_sampler_state(struct brw_context *brw,
                        unsigned min_lod,
                        unsigned max_lod,
                        int lod_bias,
-                       unsigned base_level,
                        unsigned shadow_function,
                        bool non_normalized_coordinates,
                        uint32_t border_color_offset)
 {
    ss[0] = BRW_SAMPLER_LOD_PRECLAMP_ENABLE |
-           SET_FIELD(base_level, BRW_SAMPLER_BASE_MIPLEVEL) |
            SET_FIELD(mip_filter, BRW_SAMPLER_MIP_FILTER) |
            SET_FIELD(mag_filter, BRW_SAMPLER_MAG_FILTER) |
            SET_FIELD(min_filter, BRW_SAMPLER_MIN_FILTER);
@@ -491,7 +489,6 @@ brw_update_sampler_state(struct brw_context *brw,
    const unsigned max_lod = U_FIXED(CLAMP(sampler->MaxLod, 0, 13), lod_bits);
    const int lod_bias =
       S_FIXED(CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15), lod_bits);
-   const unsigned base_level = U_FIXED(0, 1);
 
    /* Upload the border color if necessary.  If not, just point it at
     * offset 0 (the start of the batch) - the color should be ignored,
@@ -515,7 +512,7 @@ brw_update_sampler_state(struct brw_context *brw,
                           max_anisotropy,
                           address_rounding,
                           wrap_s, wrap_t, wrap_r,
-                          min_lod, max_lod, lod_bias, base_level,
+                          min_lod, max_lod, lod_bias,
                           shadow_function,
                           non_normalized_coords,
                           border_color_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 97b81f08b2e..e75b795a7bc 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -312,7 +312,6 @@ void brw_emit_sampler_state(struct brw_context *brw,
                             unsigned min_lod,
                             unsigned max_lod,
                             int lod_bias,
-                            unsigned base_level,
                             unsigned shadow_function,
                             bool non_normalized_coordinates,
                             uint32_t border_color_offset);
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 54c4a6dfdd8..cba5c2f456a 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -486,7 +486,6 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw,
                           0, /* min LOD */
                           max_lod,
                           0, /* LOD bias */
-                          0, /* base miplevel */
                           0, /* shadow function */
                           non_normalized_coords,
                           0); /* border color offset - unused */
-- 
cgit v1.2.3


From 75e34d1df8b0ab56e5e658b8ef90ff6057ec954e Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 1 Jan 2015 06:09:59 -0500
Subject: nv50: fix drawing from tfb, direct-to-pushbuf submits

The stride was being set to 0, which is illegal (and also non-sensical).
Also we must wait for the buffer to become available for reading as
otherwise a wrong value may be prefetched. Since we must wait for the
buffer anyways, and it's mapped and in GART, we may as well avoid the
annoyance of the indirect pushbuf submit.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_context.h      |  2 +-
 src/gallium/drivers/nouveau/nv50/nv50_query.c        | 15 ++++++++-------
 src/gallium/drivers/nouveau/nv50/nv50_shader_state.c |  7 ++++---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c          |  5 ++---
 4 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index e7adf472ed0..69c121274a9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
 
 /* nv50_query.c */
 void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
                                struct pipe_query *, unsigned result_offset);
 void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
 void nva0_so_target_save_offset(struct pipe_context *,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index f4adbf8c653..5368ee73750 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
       nv50_query_get(push, q, 0, 0x1000f010);
       break;
    case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      q->sequence++;
       nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe,
 }
 
 void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                           struct pipe_query *pq, unsigned result_offset)
 {
    struct nv50_query *q = nv50_query(pq);
 
-   /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+   nv50_query_update(q);
+   if (q->state != NV50_QUERY_STATE_READY)
+      nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+   q->state = NV50_QUERY_STATE_READY;
 
-   PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
-   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
-                        NV50_IB_ENTRY_1_NO_PREFETCH);
+   BEGIN_NV04(push, SUBC_3D(method), 1);
+   PUSH_DATA (push, q->data[result_offset / 4]);
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index b033ce5c6dc..fdde11f4cd5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50)
       PUSH_DATA (push, so->num_attribs[i]);
       if (n == 4) {
          PUSH_DATA(push, targ->pipe.buffer_size);
-
-         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
          if (!targ->clean) {
             assert(targ->pq);
-            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+            nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+                                      targ->pq, 0x4);
          } else {
+            BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
             PUSH_DATA(push, 0);
             targ->clean = false;
          }
@@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
             (so->stride[i] * nv50->state.prim_size);
          prims = MIN2(prims, limit);
       }
+      targ->stride = so->stride[i];
       BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
    }
    if (prims != ~0) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 6324726acec..ca51ea1dc7e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -736,9 +736,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
       BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
       PUSH_DATA (push, 0);
       BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
       BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
       PUSH_DATA (push, 0);
 
-- 
cgit v1.2.3


From 114cc18b98b6e016ab1986577aa3df12acc22cca Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 3 Jul 2015 19:21:21 -0400
Subject: nv50: clear buffer status on all vertex bufs, not just the first one

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index ca51ea1dc7e..f35326d4198 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -317,7 +317,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
          if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
             buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
             nv50->base.vbo_dirty = true;
-            break;
          }
       }
    }
-- 
cgit v1.2.3


From 1f62d36ae21043c472fc182fd4b738ec1d54a2d2 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 3 Jul 2015 20:16:48 -0400
Subject: nv50: rebind bo to bufctx when invalidating idxbuf storage

There is nothing to be done on a dirty idxbuf, but the bo may have
changed, so we have to rebind it to the bufctx.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_context.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 4949459a803..4108f48005e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
          }
       }
 
-      if (nv50->idxbuf.buffer == res)
+      if (nv50->idxbuf.buffer == res) {
+         /* Just rebind to the bufctx as there is no separate dirty bit */
+         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+         BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
          if (!--ref)
             return ref;
+      }
 
       for (s = 0; s < 3; ++s) {
       assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
-- 
cgit v1.2.3


From 4a025c6bc835387a31007fdf30a130e612e54e19 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 3 Jul 2015 20:32:53 -0400
Subject: nv50: don't flush vertex arrays when index buffer changes

The index buffer is fed in inline over a pushbuf. It's not related to
vertices or any caching that might be done on them.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index f35326d4198..600b973c5f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -836,10 +836,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
          nv50->base.vbo_dirty = true;
    }
 
-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = true;
-
    if (nv50->base.vbo_dirty) {
       BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
       PUSH_DATA (push, 0);
-- 
cgit v1.2.3


From c830d193db5c90cf0af57ff73606e2aa12aed9a8 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 24 Aug 2015 11:49:05 -0400
Subject: nv50: avoid using inline vertex data submit when gl_VertexID is used

The hardware only generates vertexid when vertices come from a VBO. This
fixes:

  vertexid-drawelements
  vertexid-drawarrays

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_program.c        |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_program.h        |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_state_validate.c |  3 ++-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c            | 11 ++++++++++-
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 02dc3677259..eff4477472c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
       case TGSI_SEMANTIC_VERTEXID:
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+         prog->vp.vertexid = 1;
          continue;
       default:
          break;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 5d3ff5644d2..f4e8e9402ca 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,6 +76,7 @@ struct nv50_program {
       ubyte psiz;        /* output slot of point size */
       ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
       ubyte edgeflag;
+      ubyte vertexid;
       ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
       ubyte clpd_nr;
    } vp;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b304a177b50..66dcf43533b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,7 +503,8 @@ static struct state_validate {
     { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
     { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                    NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+                                   NV50_NEW_VERTPROG },
     { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 600b973c5f6..e7984734af9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
    uint64_t addrs[PIPE_MAX_ATTRIBS];
    uint32_t limits[PIPE_MAX_ATTRIBS];
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj *vertex = nv50->vertex;
+   struct nv50_vertex_stateobj dummy = {};
+   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
    struct pipe_vertex_buffer *vb;
    struct nv50_vertex_element *ve;
    uint32_t mask;
@@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
    unsigned i;
    const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
 
+   /* A vertexid is not generated for inline data uploads. Have to use a
+    * VBO. This check must come after the vertprog has been validated,
+    * otherwise vertexid may be unset.
+    */
+   assert(nv50->vertprog->translated);
+   if (nv50->vertprog->vp.vertexid)
+      nv50->vbo_push_hint = 0;
+
    if (unlikely(vertex->need_conversion))
       nv50->vbo_fifo = ~0;
    else
-- 
cgit v1.2.3


From a778831735ea45f789c247c40677cd26adc78e3e Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 2 Jul 2015 18:44:18 -0400
Subject: nouveau: don't mark full range as used on unmap with explicit flush

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nouveau_buffer.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 67e181e803a..912b7789006 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -532,8 +532,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
    struct nv04_resource *buf = nv04_resource(transfer->resource);
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
-         nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+         if (tx->map)
+            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+         util_range_add(&buf->valid_buffer_range,
+                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
+      }
 
       if (likely(buf->domain)) {
          const uint8_t bind = buf->base.bind;
@@ -541,9 +546,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
             nv->vbo_dirty = true;
       }
-
-      util_range_add(&buf->valid_buffer_range,
-                     tx->base.box.x, tx->base.box.x + tx->base.box.width);
    }
 
    if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
-- 
cgit v1.2.3


From bfd3d5244b26aca7696715066f5e5331e0b5fe2a Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 5 Sep 2015 13:11:27 -0400
Subject: st/mesa: properly handle u_upload_alloc failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vbuf is never null. We want to make sure that a resource was allocated
for the vbuf, which is *vbuf.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_cb_bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index ce593a94593..230eba8c4a5 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -351,7 +351,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
 
    u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
                   vbuf_offset, vbuf, (void **) &vertices);
-   if (!vbuf) {
+   if (!*vbuf) {
       return;
    }
 
-- 
cgit v1.2.3


From e40f32d5626c87d9e77bbc261df3648cd54bd066 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 6 Sep 2015 11:29:00 -0400
Subject: st/mesa: don't fall back to 16F when 32F is requested
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nothing in the spec allows for the reduced precision, and this also
fixes st_QuerySamplesForFormat for nv50, which does not allow MS8 on
RGBA32F. Now this will be respected instead of reporting MS8 as
supported with an assumption that the format used will be RGBA16F.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_format.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index db74184cff4..e3fb761aa98 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1270,46 +1270,40 @@ static const struct format_mapping format_map[] = {
    /* 32-bit float formats */
    {
       { GL_RGBA32F_ARB, 0 },
-      { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_RGB32F_ARB, 0 },
       { PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_LUMINANCE_ALPHA32F_ARB, 0 },
-      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_ALPHA32F_ARB, 0 },
       { PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_INTENSITY32F_ARB, 0 },
       { PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_LUMINANCE32F_ARB, 0 },
       { PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_R32F, 0 },
       { PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_RG32F, 0 },
-      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
 
    /* R, RG formats */
-- 
cgit v1.2.3


From 13bfa5201162185005fa9a45f7bc76e5909ab193 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 4 Sep 2015 16:43:22 -0400
Subject: util: always include sha1 into the build

SHA1 is now used in all builds when HAVE_SHA1 is defined. Adjust src to
do the same thing, rather than predicating on shader cache.

Fixes: 04e201d0c02 ("mesa: change 'SHADER_SUBST' facility to work with env variables")
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Dave Airlie <airlied@gmail.com>
---
 src/util/Makefile.am      | 4 ----
 src/util/Makefile.sources | 6 ++----
 src/util/mesa-sha1.c      | 4 ++++
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index 1e087b40d38..761023d186f 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -44,11 +44,7 @@ libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)
 
-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-
 libmesautil_la_LIBADD = $(SHA1_LIBS)
-endif
 
 roundeven_test_LDADD = -lm
 
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 82df3bcb00a..afdd0cb3b5c 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,7 +1,3 @@
-MESA_UTIL_SHADER_CACHE_FILES := \
-	mesa-sha1.c \
-	mesa-sha1.h
-
 MESA_UTIL_FILES :=	\
 	bitset.h \
 	format_srgb.h \
@@ -9,6 +5,8 @@ MESA_UTIL_FILES :=	\
 	hash_table.h \
 	list.h \
 	macros.h \
+	mesa-sha1.c \
+	mesa-sha1.h \
 	ralloc.c \
 	ralloc.h \
 	register_allocate.c \
diff --git a/src/util/mesa-sha1.c b/src/util/mesa-sha1.c
index fa281937774..7614b21c8ba 100644
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -290,6 +290,8 @@ _mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
 
 #endif
 
+#ifdef HAVE_SHA1
+
 void
 _mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
 {
@@ -314,3 +316,5 @@ _mesa_sha1_format(char *buf, const unsigned char *sha1)
 
    return buf;
 }
+
+#endif
-- 
cgit v1.2.3


From ae535cb0bf86e3ae376d42f4fd064bb73c22427e Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 7 Sep 2015 00:06:51 -0400
Subject: util: make mesa-sha1.c completely empty when there are no SHA1 impls

My earlier attempt to fix this missed the fact that there was a #else
clause that assumes that you have openssh. This moves the whole thing
under #ifdef HAVE_SHA1 which should avoid this issue.

Fixes: 13bfa5201 (util: always include sha1 into the build)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91898
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Dave Airlie <airlied@gmail.com>
---
 src/util/mesa-sha1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/util/mesa-sha1.c b/src/util/mesa-sha1.c
index 7614b21c8ba..faa1c871b5d 100644
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -26,6 +26,8 @@
 
 #include "mesa-sha1.h"
 
+#ifdef HAVE_SHA1
+
 #if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
 	|| defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
 
@@ -290,8 +292,6 @@ _mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
 
 #endif
 
-#ifdef HAVE_SHA1
-
 void
 _mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
 {
-- 
cgit v1.2.3


From 00c568f679413ee627421d5724beb85be3da55c1 Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Sat, 5 Sep 2015 15:26:59 +0200
Subject: i965/nir/vec4: removed unneeded tex src swizzle set

At that point the swizzle should be correct.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 0ff298ed0e9..f6e59ce4196 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2624,7 +2624,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
 	    mrf = param_base;
 	    writemask = WRITEMASK_W;
 	 }
-         lod.swizzle = BRW_SWIZZLE_XXXX;
 	 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
       } else if (op == ir_txf) {
          emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
-- 
cgit v1.2.3


From 1432a182414352e853bfdad997591598e621fd73 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 2 Sep 2015 23:13:33 -0700
Subject: xa: add xa_surface_from_handle2 v2

Like xa_surface_from_handle(), but takes a handle type, rather than
hard-coding 'shared' handle.  This is needed to fix bugs seen with
xf86-video-freedreno with xrandr rotation, for example.  The root issue
is that doing a GEM_OPEN ioctl on a bo that already has a GEM handle
associated with the drm_file will result in two unique handles for the
same bo.  Which causes all sorts of follow-on fail.

v2:
- Add support for for fd handles.
- Avoid duplicating code.
- Bump xa version minor.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 src/gallium/state_trackers/xa/xa_tracker.c | 41 ++++++++++++++++++++++--------
 src/gallium/state_trackers/xa/xa_tracker.h | 14 +++++++++-
 src/gallium/targets/xa/xa.sym              |  1 +
 3 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index 21ca57ca633..2944b16858c 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -298,6 +298,20 @@ xa_format_check_supported(struct xa_tracker *xa,
     return XA_ERR_NONE;
 }
 
+static unsigned
+handle_type(enum xa_handle_type type)
+{
+    switch (type) {
+    case xa_handle_type_kms:
+	return DRM_API_HANDLE_TYPE_KMS;
+    case xa_handle_type_fd:
+        return DRM_API_HANDLE_TYPE_FD;
+    case xa_handle_type_shared:
+    default:
+	return DRM_API_HANDLE_TYPE_SHARED;
+    }
+}
+
 static struct xa_surface *
 surface_create(struct xa_tracker *xa,
 		  int width,
@@ -379,10 +393,25 @@ xa_surface_from_handle(struct xa_tracker *xa,
 		  enum xa_surface_type stype,
 		  enum xa_formats xa_format, unsigned int flags,
 		  uint32_t handle, uint32_t stride)
+{
+    return xa_surface_from_handle2(xa, width, height, depth, stype, xa_format,
+                                   DRM_API_HANDLE_TYPE_SHARED, flags, handle,
+                                   stride);
+}
+
+XA_EXPORT struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+                        int width,
+                        int height,
+                        int depth,
+                        enum xa_surface_type stype,
+                        enum xa_formats xa_format, unsigned int flags,
+                        enum xa_handle_type type,
+                        uint32_t handle, uint32_t stride)
 {
     struct winsys_handle whandle;
     memset(&whandle, 0, sizeof(whandle));
-    whandle.type = DRM_API_HANDLE_TYPE_SHARED;
+    whandle.type = handle_type(type);
     whandle.handle = handle;
     whandle.stride = stride;
     return surface_create(xa, width, height, depth, stype, xa_format, flags, &whandle);
@@ -511,15 +540,7 @@ xa_surface_handle(struct xa_surface *srf,
     boolean res;
 
     memset(&whandle, 0, sizeof(whandle));
-    switch (type) {
-    case xa_handle_type_kms:
-	whandle.type = DRM_API_HANDLE_TYPE_KMS;
-	break;
-    case xa_handle_type_shared:
-    default:
-	whandle.type = DRM_API_HANDLE_TYPE_SHARED;
-	break;
-    }
+    whandle.type = handle_type(type);
     res = screen->resource_get_handle(screen, srf->tex, &whandle);
     if (!res)
 	return -XA_ERR_INVAL;
diff --git a/src/gallium/state_trackers/xa/xa_tracker.h b/src/gallium/state_trackers/xa/xa_tracker.h
index 5c6435e19c1..44b3eb5cbe4 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.h
+++ b/src/gallium/state_trackers/xa/xa_tracker.h
@@ -37,7 +37,7 @@
 #include <stdint.h>
 
 #define XA_TRACKER_VERSION_MAJOR 2
-#define XA_TRACKER_VERSION_MINOR 2
+#define XA_TRACKER_VERSION_MINOR 3
 #define XA_TRACKER_VERSION_PATCH 0
 
 #define XA_FLAG_SHARED         (1 << 0)
@@ -149,6 +149,7 @@ struct xa_box {
 enum xa_handle_type {
     xa_handle_type_shared,
     xa_handle_type_kms,
+    xa_handle_type_fd,
 };
 
 extern void xa_tracker_version(int *major, int *minor, int *patch);
@@ -177,6 +178,17 @@ extern struct xa_surface * xa_surface_from_handle(struct xa_tracker *xa,
 					    enum xa_formats pform,
 					    unsigned int flags,
 					    uint32_t handle, uint32_t stride);
+extern struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+                        int width,
+                        int height,
+                        int depth,
+                        enum xa_surface_type stype,
+                        enum xa_formats xa_format,
+                        unsigned int flags,
+                        enum xa_handle_type type,
+                        uint32_t handle,
+                        uint32_t stride);
 
 enum xa_formats xa_surface_format(const struct xa_surface *srf);
 
diff --git a/src/gallium/targets/xa/xa.sym b/src/gallium/targets/xa/xa.sym
index 9c7f422f399..50ccc99cb48 100644
--- a/src/gallium/targets/xa/xa.sym
+++ b/src/gallium/targets/xa/xa.sym
@@ -23,6 +23,7 @@
 		xa_surface_dma;
 		xa_surface_format;
 		xa_surface_from_handle;
+		xa_surface_from_handle2;
 		xa_surface_handle;
 		xa_surface_map;
 		xa_surface_redefine;
-- 
cgit v1.2.3


From f1ef89eaab765789d777a5bd1bdfcb7811e3a22d Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 4 Sep 2015 03:26:05 -0700
Subject: svga: Fix surface view error handling

Make sure errors are correcly propagated.
Also don't flush during state emission if emission fails.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_surface.c | 48 ++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 477a938ccdd..aca5abcdfce 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -296,6 +296,11 @@ svga_create_surface_view(struct pipe_context *pipe,
       s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
                                             surf_tmpl->u.tex.level, 1,
                                             layer, nlayers, zslice, &s->key);
+      if (!s->handle) {
+         FREE(s);
+         return NULL;
+      }
+
       s->key.format = format;
       s->real_layer = 0;
       s->real_level = 0;
@@ -358,6 +363,8 @@ create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
                                              &tex->b.b,
                                              &s->base,
                                              TRUE);
+      if (!backed_view)
+         return NULL;
 
       bs = svga_surface(backed_view);
       s->backed = bs;
@@ -376,7 +383,6 @@ struct pipe_surface *
 svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
 {
    enum pipe_error ret = PIPE_OK;
-   int try;
    unsigned shader;
 
    assert(svga_have_vgpu10(svga));
@@ -395,6 +401,9 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
                   "same resource used in shaderResource and renderTarget 0x%x\n",
                   s->handle);
          s = create_backed_surface_view(svga, s);
+         if (!s)
+            return NULL;
+
          break;
       }
    }
@@ -430,32 +439,27 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
          resType = SVGA3D_RESOURCE_TEXTURE2D;
       }
 
-      for (try = 0; try < 2; try++) {
-         if (util_format_is_depth_or_stencil(s->base.format)) {
-            ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
-                                                       s->view_id,
-                                                       s->handle,
-                                                       s->key.format,
-                                                       resType,
-                                                       &desc);
-         }
-         else {
-            ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
-                                                       s->view_id,
-                                                       s->handle,
-                                                       s->key.format,
-                                                       resType,
-                                                       &desc);
-         }
-         if (ret == PIPE_OK)
-            break;
-         svga_context_flush(svga, NULL);
+      if (util_format_is_depth_or_stencil(s->base.format)) {
+         ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
+      }
+      else {
+         ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
       }
 
-      assert(ret == PIPE_OK);
       if (ret != PIPE_OK) {
          util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
          s->view_id = SVGA3D_INVALID_ID;
+         return NULL;
       }
    }
    return &s->base;
-- 
cgit v1.2.3


From 5f9c72ad23e3f646c2007b70f482561c98b94ea7 Mon Sep 17 00:00:00 2001
From: Jon TURNEY <jon.turney@dronecode.org.uk>
Date: Fri, 4 Sep 2015 12:43:05 +0100
Subject: glxl/tests: Use X11_INCLUDES instead of X11_CFLAGS

X11_CFLAGS is undefined, so these tests will fail to build if x11proto is
installed in a non-standard location.

(See also commits 35189d76, bc93c3798, 54b028ba, d901d7e08, etc.)

Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glx/tests/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glx/tests/Makefile.am b/src/glx/tests/Makefile.am
index b02a9e3770e..bdc78c0d5af 100644
--- a/src/glx/tests/Makefile.am
+++ b/src/glx/tests/Makefile.am
@@ -10,7 +10,7 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/include/GL/internal \
 	$(DEFINES) \
 	$(LIBDRM_CFLAGS) \
-	$(X11_CFLAGS)
+	$(X11_INCLUDES)
 
 TESTS = glx-test
 check_PROGRAMS = glx-test
-- 
cgit v1.2.3


From a1575b55c2384faa461e7802371c5ddd37e5befe Mon Sep 17 00:00:00 2001
From: Jon TURNEY <jon.turney@dronecode.org.uk>
Date: Fri, 4 Sep 2015 12:43:06 +0100
Subject: mesa/tests: Remove unneeded X11_CFLAGS

X11_CFLAGS is never defined.  Path to X11 headers is not needed here, so
just remove.

Future work: Using AM_CFLAGS here looks wrong, as this Makefile only builds
C++ files

Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/tests/Makefile.am | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am
index 9467f3ba8c6..bd7ab7365c0 100644
--- a/src/mesa/main/tests/Makefile.am
+++ b/src/mesa/main/tests/Makefile.am
@@ -1,5 +1,4 @@
 AM_CFLAGS = \
-	$(X11_CFLAGS) \
 	$(PTHREAD_CFLAGS)
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gtest/include \
-- 
cgit v1.2.3


From be519c2d50f4aaa48fdb8b27707114cc5bfd348f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 5 Sep 2015 19:19:33 +0100
Subject: i965: Disallow fast blit paths for CopyTexImage with PixelTransfer
 ops

glCopyTexImage behaves similarly to glReadPixels with respect to the
pixel transfer operations. Therefore if any are set we cannot use the
simple blit-only fast paths.

(Though if would be possible to relax the blorp path to handle
pixel zoom, or we can just enhance meta.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jason Ekstrand <jason.ekstrand@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviwewed-by: Iago Toral <itoral@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 4 ++++
 src/mesa/drivers/dri/i965/intel_tex_copy.c   | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 205c905b447..ba11d3dd07f 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -215,6 +215,10 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
    struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
    struct intel_texture_image *intel_image = intel_texture_image(dst_image);
 
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
    /* Sync up the state of window system buffers.  We need to do this before
     * we go looking at the src renderbuffer's miptree.
     */
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index 4d8c82e0569..ecdd052fdf6 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -55,6 +55,10 @@ intel_copy_texsubimage(struct brw_context *brw,
    const GLenum internalFormat = intelImage->base.Base.InternalFormat;
    bool ret;
 
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
    intel_prepare_render(brw);
 
    /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
-- 
cgit v1.2.3


From 87073c69f3e253044bc235f34917aaa89041a63c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 7 Sep 2015 21:50:48 +0200
Subject: nv30: Fix max width / height checks in nv30 sifm code

The sifm object has a limit of 1024x1024 for its input size and 2048x2048
for its output. The code checking this was trying to be clever resulting
in it seeing a surface of e.g 1024x256 being outside of the input size
limit.

This commit fixes this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv30/nv30_transfer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
index 214da6568c3..2452071762b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
 static bool
 nv30_transfer_sifm(XFER_ARGS)
 {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
       return false;
 
    if (src->d > 1 || dst->d > 1)
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
       return false;
 
    if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
          return false;
    } else {
       if (dst->domain != NOUVEAU_BO_VRAM)
-- 
cgit v1.2.3


From 205ff843ffa12f461c5307903c200baa1d3c87c0 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 1 Sep 2015 11:32:29 +0200
Subject: nir: UBO loads no longer use const_index[1]

Commit 2126c68e5cba killed the array elements parameter on load/store
intrinsics that was stored in const_index[1]. It looks like that
patch missed to remove this assignment in the UBO path.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 5fb4ee25c40..0712908eee8 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1001,7 +1001,6 @@ nir_visitor::visit(ir_expression *ir)
       nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
       load->num_components = ir->type->vector_elements;
       load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
-      load->const_index[1] = 1; /* number of vec4's */
       load->src[0] = evaluate_rvalue(ir->operands[0]);
       if (!const_index)
          load->src[1] = evaluate_rvalue(ir->operands[1]);
-- 
cgit v1.2.3


From 39df725f731f75f488c75a4910169beb352213fb Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 6 Sep 2015 04:51:29 -0400
Subject: nvc0: always emit a full shader colormask

Indications are that if the colormask indicates a single bit set on
fermi, that value will always be read from $r0 instead of a potentially
higher register (if e.g. green is set). Not to upset the counting logic,
always set the header up with a full color mask for each RT. Such a
situation can basically only ever happen with generated blit shaders.

Fixes the following piglit on Fermi (Kepler is unaffected):
  fbo-stencil blit GL_DEPTH32F_STENCIL8

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 12f1bb728d7..a168dd684ab 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -449,7 +449,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
 
    for (i = 0; i < info->numOutputs; ++i) {
       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
    }
 
    fp->fp.early_z = info->prop.fp.earlyFragTests;
-- 
cgit v1.2.3


From 458e55d7c5793b02af8b08ebec90906a829d3f65 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 8 Sep 2015 16:58:43 -0400
Subject: st/mesa: increase viewport bounds limits for GL4 hw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the ARB_viewport_array spec, GL4 limit is higher than the
GL3 limit. Also take this opportunity to fix the GL3 limit.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_extensions.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 17f572f80fb..884761c6c30 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -873,8 +873,13 @@ void st_init_extensions(struct pipe_screen *screen,
 
    consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS);
    if (consts->MaxViewports >= 16) {
-      consts->ViewportBounds.Min = -16384.0;
-      consts->ViewportBounds.Max = 16384.0;
+      if (glsl_feature_level >= 400) {
+         consts->ViewportBounds.Min = -32768.0;
+         consts->ViewportBounds.Max = 32767.0;
+      } else {
+         consts->ViewportBounds.Min = -16384.0;
+         consts->ViewportBounds.Max = 16383.0;
+      }
       extensions->ARB_viewport_array = GL_TRUE;
       extensions->ARB_fragment_layer_viewport = GL_TRUE;
       if (extensions->AMD_vertex_shader_layer)
-- 
cgit v1.2.3


From f5509874aa747167255c2fb739ed44be2445a4c6 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 8 Sep 2015 10:19:22 -0700
Subject: i965/skl: Use more compact hiz dimensions

I meant to do this here, but it was in the wrong place:

commit c1151b18f2dce7c6f238f057e9c4fa8d912ce6b5
Author: Ben Widawsky <benjamin.widawsky@intel.com>
Date:   Wed Jun 24 20:07:54 2015 -0700

   i965/skl: Use more compact hiz dimensions

NOTE: Jordan did go back and look at the original mailing list post. I mailed
the right thing, and pushed the wrong one.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 64 +++++++++++++--------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 0bcbbbcde8f..19f66b70a59 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1509,23 +1509,21 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
     * adjustments required for Z_Height and Z_Width based on multisampling.
     */
-   if (brw->gen < 9) {
-      switch (mt->num_samples) {
-      case 0:
-      case 1:
-         break;
-      case 2:
-      case 4:
-         z_width *= 2;
-         z_height *= 2;
-         break;
-      case 8:
-         z_width *= 4;
-         z_height *= 2;
-         break;
-      default:
-         unreachable("unsupported sample count");
-      }
+   switch (mt->num_samples) {
+   case 0:
+   case 1:
+      break;
+   case 2:
+   case 4:
+      z_width *= 2;
+      z_height *= 2;
+      break;
+   case 8:
+      z_width *= 4;
+      z_height *= 2;
+      break;
+   default:
+      unreachable("unsupported sample count");
    }
 
    const unsigned vertical_align = 8; /* 'j' in the docs */
@@ -1605,21 +1603,23 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
     * adjustments required for Z_Height and Z_Width based on multisampling.
     */
-   switch (mt->num_samples) {
-   case 0:
-   case 1:
-      break;
-   case 2:
-   case 4:
-      z_width *= 2;
-      z_height *= 2;
-      break;
-   case 8:
-      z_width *= 4;
-      z_height *= 2;
-      break;
-   default:
-      unreachable("unsupported sample count");
+   if (brw->gen < 9) {
+      switch (mt->num_samples) {
+      case 0:
+      case 1:
+         break;
+      case 2:
+      case 4:
+         z_width *= 2;
+         z_height *= 2;
+         break;
+      case 8:
+         z_width *= 4;
+         z_height *= 2;
+         break;
+      default:
+         unreachable("unsupported sample count");
+      }
    }
 
    const unsigned vertical_align = 8; /* 'j' in the docs */
-- 
cgit v1.2.3


From 6d2ceb10cd63b89892131a27d238620f00922dfb Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 8 Sep 2015 08:36:32 +1000
Subject: r600: don't use shader key without verifying shader type (v2)

Since 7a32652231f96eac14c4bfce02afe77b4132fb77
r600: Turn 'r600_shader_key' struct into union

we were accessing key fields that might be aliased in the union
with other fields, so we should check what shader type we are
compiling for before using key values from it.

v1.1: make it compile
v2: have caffeine, make it work - we don't set type
until later, so don't reference it until we've set it.

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 78904da13c3..f2c9e169f74 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -143,7 +143,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 	bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
 	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
 	unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
-	unsigned export_shader = key.vs.as_es;
+	unsigned export_shader;
 
 	shader->shader.bc.isa = rctx->isa;
 
@@ -224,6 +224,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 		break;
 	case TGSI_PROCESSOR_VERTEX:
+		export_shader = key.vs.as_es;
 		if (rctx->b.chip_class >= EVERGREEN) {
 			if (export_shader)
 				evergreen_update_es_state(ctx, shader);
@@ -1901,8 +1902,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.shader = shader;
 	ctx.native_integers = true;
 
-	shader->vs_as_gs_a = key.vs.as_gs_a;
-	shader->vs_as_es = key.vs.as_es;
 
 	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
 			   rscreen->has_compressed_msaa_texturing);
@@ -1918,9 +1917,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->processor_type = ctx.type;
 	ctx.bc->type = shader->processor_type;
 
-	ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+		shader->vs_as_gs_a = key.vs.as_gs_a;
+		shader->vs_as_es = key.vs.as_es;
+	}
+
+	ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
 
-	if (key.vs.as_es) {
+	if (shader->vs_as_es) {
 		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
 	} else {
 		ctx.gs_for_vs = NULL;
@@ -1941,7 +1945,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->nr_ps_color_exports = 0;
 	shader->nr_ps_max_color_exports = 0;
 
-	shader->two_side = key.ps.color_two_side;
+	if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+		shader->two_side = key.ps.color_two_side;
 
 	/* register allocations */
 	/* Values [0,127] correspond to GPR[0..127].
@@ -2327,7 +2332,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	convert_edgeflag_to_int(&ctx);
 
 	if (ring_outputs) {
-		if (key.vs.as_es) {
+		if (shader->vs_as_es) {
 			ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
 			ctx.gs_export_gpr_tregs[1] = -1;
 			ctx.gs_export_gpr_tregs[2] = -1;
-- 
cgit v1.2.3


From e8a219ab463a0071f4d9c5d88feeeaa87b2a01f2 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Wed, 26 Aug 2015 10:52:58 -0700
Subject: i965/gen8+: Skip depth stalls on state change

Docs suggest this is no longer required starting with Gen8.

Perf (no regressions in n=20)
OglMultithread       0.67%
OglTerrainPanInst    0.12%
trex                 0.45%
warsow               0.64%

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 7ee3cb680f7..a2aef8ad2b6 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -193,6 +193,14 @@ brw_emit_depth_stall_flushes(struct brw_context *brw)
 {
    assert(brw->gen >= 6 && brw->gen <= 9);
 
+   /* Starting on BDW, these pipe controls are unnecessary.
+    *
+    *   WM HW will internally manage the draining pipe and flushing of the caches
+    *   when this command is issued. The PIPE_CONTROL restrictions are removed.
+    */
+   if (brw->gen >= 8)
+      return;
+
    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-- 
cgit v1.2.3


From a9df772e0e76254ab232876016ec09b1fadbd700 Mon Sep 17 00:00:00 2001
From: Chris Forbes <chrisf@ijw.co.nz>
Date: Fri, 7 Nov 2014 00:04:01 +1300
Subject: i965: Add defines for all new Gen7/8 URB opcodes

Tessellation needs to emit URB reads and atomics;

Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h |  8 +++++++-
 src/mesa/drivers/dri/i965/brw_disasm.c  | 18 +++++++++---------
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3c128aee809..0f7feb38469 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1694,7 +1694,13 @@ enum brw_message_target {
 
 #define BRW_URB_OPCODE_WRITE_HWORD  0
 #define BRW_URB_OPCODE_WRITE_OWORD  1
-#define GEN8_URB_OPCODE_SIMD8_WRITE  7
+#define BRW_URB_OPCODE_READ_HWORD   2
+#define BRW_URB_OPCODE_READ_OWORD   3
+#define GEN7_URB_OPCODE_ATOMIC_MOV  4
+#define GEN7_URB_OPCODE_ATOMIC_INC  5
+#define GEN8_URB_OPCODE_ATOMIC_ADD  6
+#define GEN8_URB_OPCODE_SIMD8_WRITE 7
+#define GEN8_URB_OPCODE_SIMD8_READ  8
 
 #define BRW_URB_SWIZZLE_NONE          0
 #define BRW_URB_SWIZZLE_INTERLEAVE    1
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 61be2b04fee..7401e325638 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -572,15 +572,15 @@ static const char *const gen5_urb_opcode[] = {
 };
 
 static const char *const gen7_urb_opcode[] = {
-   [0] = "write HWord",
-   [1] = "write OWord",
-   [2] = "read HWord",
-   [3] = "read OWord",
-   [4] = "atomic mov",  /* Gen7+ */
-   [5] = "atomic inc",  /* Gen7+ */
-   [6] = "atomic add",  /* Gen8+ */
-   [7] = "SIMD8 write", /* Gen8+ */
-   [8] = "SIMD8 read",  /* Gen8+ */
+   [BRW_URB_OPCODE_WRITE_HWORD] = "write HWord",
+   [BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
+   [BRW_URB_OPCODE_READ_HWORD] = "read HWord",
+   [BRW_URB_OPCODE_READ_OWORD] = "read OWord",
+   [GEN7_URB_OPCODE_ATOMIC_MOV] = "atomic mov",  /* Gen7+ */
+   [GEN7_URB_OPCODE_ATOMIC_INC] = "atomic inc",  /* Gen7+ */
+   [GEN8_URB_OPCODE_ATOMIC_ADD] = "atomic add",  /* Gen8+ */
+   [GEN8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gen8+ */
+   [GEN8_URB_OPCODE_SIMD8_READ] = "SIMD8 read",  /* Gen8+ */
    /* [9-15] - reserved */
 };
 
-- 
cgit v1.2.3


From 70650094ef89ec0261071862ca1589f0550fb792 Mon Sep 17 00:00:00 2001
From: Chris Forbes <chrisf@ijw.co.nz>
Date: Sun, 7 Dec 2014 15:32:11 +1300
Subject: i965: Add 64-bit dirty flag handling to brw_upload_pull_constants

Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_state.h            | 2 +-
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index e75b795a7bc..afce8ad3b27 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -351,7 +351,7 @@ void gen6_init_vtable_surface_functions(struct brw_context *brw);
 /* brw_vs_surface_state.c */
 void
 brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield brw_new_constbuf,
+                          GLbitfield64 brw_new_constbuf,
                           const struct gl_program *prog,
                           struct brw_stage_state *stage_state,
                           const struct brw_stage_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index fd7e56e50d5..9bb48eb2e27 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -50,7 +50,7 @@
  */
 void
 brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield brw_new_constbuf,
+                          GLbitfield64 brw_new_constbuf,
                           const struct gl_program *prog,
                           struct brw_stage_state *stage_state,
                           const struct brw_stage_prog_data *prog_data,
-- 
cgit v1.2.3


From aa18fa30c5e692a0b77aea40a966d4bf99ebe02e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 24 Jul 2015 21:01:43 -0700
Subject: i965: Fix value of _3DPRIM_TRIFAN_NOSTIPPLE.

TRIFAN_NOSTIPPLE has always been 0x16 - 0x15 is marked "Reserved" on all
platforms.  See the 965 PRM, Volume 2, Table 3-1, "3D Primitive Topology
Type Encoding" for a list.

We don't currently use this, and I don't expect we will, but we may as
well not leave the bogus value around.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0f7feb38469..411a97d733b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -77,7 +77,7 @@
 #define _3DPRIM_LINESTRIP_CONT    0x12
 #define _3DPRIM_LINESTRIP_BF      0x13
 #define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x16
 
 /* We use this offset to be able to pass native primitive types in struct
  * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
-- 
cgit v1.2.3


From 8fbc4ae330b9ef51bce9023c2f58d1888fb6b796 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 24 Jul 2015 21:03:31 -0700
Subject: i965: Mark topologies with adjacency information as G45+.

These didn't exist on the original 965.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 411a97d733b..a8594afa486 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -65,10 +65,10 @@
 #define _3DPRIM_TRIFAN            0x06
 #define _3DPRIM_QUADLIST          0x07
 #define _3DPRIM_QUADSTRIP         0x08
-#define _3DPRIM_LINELIST_ADJ      0x09
-#define _3DPRIM_LINESTRIP_ADJ     0x0A
-#define _3DPRIM_TRILIST_ADJ       0x0B
-#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_LINELIST_ADJ      0x09 /* G45+ */
+#define _3DPRIM_LINESTRIP_ADJ     0x0A /* G45+ */
+#define _3DPRIM_TRILIST_ADJ       0x0B /* G45+ */
+#define _3DPRIM_TRISTRIP_ADJ      0x0C /* G45+ */
 #define _3DPRIM_TRISTRIP_REVERSE  0x0D
 #define _3DPRIM_POLYGON           0x0E
 #define _3DPRIM_RECTLIST          0x0F
-- 
cgit v1.2.3


From d5d74d0b86207cff2820561cc3c8b40546a11bc9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 3 Aug 2015 16:02:16 -0700
Subject: nir: Add a nir_system_value_from_intrinsic() function.

This converts NIR intrinsics that load system values into Mesa's
SYSTEM_VALUE_* enumerations.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir.c | 34 ++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir.h |  2 ++
 2 files changed, 36 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 3f4bec4625d..ab06ea2bc19 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1404,3 +1404,37 @@ nir_index_ssa_defs(nir_function_impl *impl)
    nir_foreach_block(impl, index_ssa_block, &index);
    impl->ssa_alloc = index;
 }
+
+gl_system_value
+nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+{
+   switch (intrin) {
+   case nir_intrinsic_load_vertex_id:
+      return SYSTEM_VALUE_VERTEX_ID;
+   case nir_intrinsic_load_instance_id:
+      return SYSTEM_VALUE_INSTANCE_ID;
+   case nir_intrinsic_load_vertex_id_zero_base:
+      return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+   case nir_intrinsic_load_base_vertex:
+      return SYSTEM_VALUE_BASE_VERTEX;
+   case nir_intrinsic_load_invocation_id:
+      return SYSTEM_VALUE_INVOCATION_ID;
+   case nir_intrinsic_load_front_face:
+      return SYSTEM_VALUE_FRONT_FACE;
+   case nir_intrinsic_load_sample_id:
+      return SYSTEM_VALUE_SAMPLE_ID;
+   case nir_intrinsic_load_sample_pos:
+      return SYSTEM_VALUE_SAMPLE_POS;
+   case nir_intrinsic_load_sample_mask_in:
+      return SYSTEM_VALUE_SAMPLE_MASK_IN;
+   /* FINISHME: Add tessellation intrinsics.
+      return SYSTEM_VALUE_TESS_COORD;
+      return SYSTEM_VALUE_VERTICES_IN;
+      return SYSTEM_VALUE_PRIMITIVE_ID;
+      return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+      return SYSTEM_VALUE_TESS_LEVEL_INNER;
+    */
+   default:
+      unreachable("intrinsic doesn't produce a system value");
+   }
+}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e73b7fbc4e1..3c375f30c53 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1842,6 +1842,8 @@ bool nir_opt_undef(nir_shader *shader);
 
 void nir_sweep(nir_shader *shader);
 
+gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
cgit v1.2.3


From 0cc331dddd1a99c7af3619c92c48b5c32e17f6b3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 3 Aug 2015 16:10:21 -0700
Subject: i965/nir: Use nir_system_value_from_intrinsic to reduce duplication.

This code is all pretty much identical.  We just needed the translation
from one enum value to the other.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 47 +++++++-----------------------
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 30 +++++--------------
 2 files changed, 17 insertions(+), 60 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9929dd6a42f..a6c6a2fa8db 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1444,35 +1444,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-   case nir_intrinsic_load_vertex_id_zero_base: {
-      fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-      assert(vertex_id.file != BAD_FILE);
-      dest.type = vertex_id.type;
-      bld.MOV(dest, vertex_id);
-      break;
-   }
-
-   case nir_intrinsic_load_base_vertex: {
-      fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-      assert(base_vertex.file != BAD_FILE);
-      dest.type = base_vertex.type;
-      bld.MOV(dest, base_vertex);
-      break;
-   }
-
-   case nir_intrinsic_load_instance_id: {
-      fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-      assert(instance_id.file != BAD_FILE);
-      dest.type = instance_id.type;
-      bld.MOV(dest, instance_id);
-      break;
-   }
-
-   case nir_intrinsic_load_sample_mask_in: {
-      fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
-      assert(sample_mask_in.file != BAD_FILE);
-      dest.type = sample_mask_in.type;
-      bld.MOV(dest, sample_mask_in);
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
       break;
    }
 
@@ -1485,14 +1466,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_sample_id: {
-      fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
-      assert(sample_id.file != BAD_FILE);
-      dest.type = sample_id.type;
-      bld.MOV(dest, sample_id);
-      break;
-   }
-
    case nir_intrinsic_load_uniform_indirect:
       has_indirect = true;
       /* fallthrough */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f3dc1123942..751ec73f709 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -533,30 +533,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-   case nir_intrinsic_load_vertex_id_zero_base: {
-      src_reg vertex_id =
-         src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
-      assert(vertex_id.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, vertex_id.type);
-      emit(MOV(dest, vertex_id));
-      break;
-   }
-
-   case nir_intrinsic_load_base_vertex: {
-      src_reg base_vertex =
-         src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
-      assert(base_vertex.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, base_vertex.type);
-      emit(MOV(dest, base_vertex));
-      break;
-   }
-
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_instance_id: {
-      src_reg instance_id =
-         src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
-      assert(instance_id.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, instance_id.type);
-      emit(MOV(dest, instance_id));
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      src_reg val = src_reg(nir_system_values[sv]);
+      assert(val.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, val.type);
+      emit(MOV(dest, val));
       break;
    }
 
-- 
cgit v1.2.3


From ea69ae04dba501016e9faf5f71095908f011d29d Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Mon, 10 Aug 2015 13:24:00 +0200
Subject: mesa/es3.1: Enable GL_MAX_VERTEX_ATTRIB enums for GLES 3.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marta Lofstedt <marta.lofstedt@linux.intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/mesa/main/get_hash_params.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index dc5ba6fc904..09e1f0e7986 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -458,6 +458,13 @@ descriptor=[
 
 # GL_ARB_explicit_uniform_location / GLES 3.1
   [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
+
+# GL_ARB_vertex_attrib_binding / GLES 3.1
+  [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
+  [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
+
+# GL 4.4 / GLES 3.1
+  [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
 ]},
 
 # Enums in OpenGL Core profile and ES 3.1
@@ -766,9 +773,6 @@ descriptor=[
   [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
   [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
 
-# GL 4.4
-  [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
-
 # GL_ARB_robustness
   [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
 
@@ -806,10 +810,6 @@ descriptor=[
   [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
   [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
 
-# GL_ARB_vertex_attrib_binding
-  [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
-  [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
-
 # GL_ARB_shader_image_load_store
   [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
   [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
-- 
cgit v1.2.3


From b8d6de87f6e2d5fc621db573b9ce579b1a27ee56 Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Mon, 31 Aug 2015 15:04:18 +0200
Subject: mesa: Allow query of GL_VERTEX_BINDING_BUFFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to OpenGL ES 3.1 specification table : 20.2 and
OpenGL specification 4.4 table 23.4. The glGetIntegeri_v
functions should report the name  of the buffer bound
when called with GL_VERTEX_BINDING_BUFFER.

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/mesa/main/get.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 4855187aa6f..d5df53057d7 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1992,6 +1992,14 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
       v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride;
       return TYPE_INT;
 
+   case GL_VERTEX_BINDING_BUFFER:
+      if (ctx->API == API_OPENGLES2 && ctx->Version < 31)
+         goto invalid_enum;
+      if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
+         goto invalid_value;
+      v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].BufferObj->Name;
+      return TYPE_INT;
+
    /* ARB_shader_image_load_store */
    case GL_IMAGE_BINDING_NAME: {
       struct gl_texture_object *t;
-- 
cgit v1.2.3


From b9ea608c1a0f3356f5c4e13bdd7307b726dc42ba Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:56 +0800
Subject: egl_dri2: Move filling context_attrib array in a separate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: Change return type of the new function from int to bool

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/egl_dri2.c | 90 ++++++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 36 deletions(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 461735fe9e3..91b5a7cf50f 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -27,6 +27,7 @@
 
 #define WL_HIDE_DEPRECATED
 
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -902,6 +903,55 @@ dri2_create_context_attribs_error(int dri_error)
    _eglError(egl_error, "dri2_create_context");
 }
 
+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+                          struct dri2_egl_display *dri2_dpy,
+                          uint32_t *ctx_attribs,
+                          unsigned *num_attribs)
+{
+   int pos = 0;
+
+   assert(*num_attribs >= 8);
+
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+   if (dri2_ctx->base.Flags != 0) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it the robust-access flag.
+       * It may explode.  Instead, generate the required EGL error here.
+       */
+      if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+            && !dri2_dpy->robustness) {
+         _eglError(EGL_BAD_MATCH, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+      ctx_attribs[pos++] = dri2_ctx->base.Flags;
+   }
+
+   if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it a reset strategy.  It may
+       * explode.  Instead, generate the required EGL error here.
+       */
+      if (!dri2_dpy->robustness) {
+         _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+      ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+   }
+
+   *num_attribs = pos;
+
+   return true;
+}
+
 /**
  * Called via eglCreateContext(), drv->API.CreateContext().
  */
@@ -987,44 +1037,12 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
    if (dri2_dpy->dri2) {
       if (dri2_dpy->dri2->base.version >= 3) {
          unsigned error;
-         unsigned num_attribs = 0;
+         unsigned num_attribs = 8;
          uint32_t ctx_attribs[8];
 
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
-         if (dri2_ctx->base.Flags != 0) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it the robust-access flag.
-             * It may explode.  Instead, generate the required EGL error here.
-             */
-            if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
-                && !dri2_dpy->robustness) {
-               _eglError(EGL_BAD_MATCH, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
-            ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
-         }
-
-         if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it a reset strategy.  It may
-             * explode.  Instead, generate the required EGL error here.
-             */
-            if (!dri2_dpy->robustness) {
-               _eglError(EGL_BAD_CONFIG, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
-            ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
-         }
-
-         assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
 
 	 dri2_ctx->dri_context =
 	    dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
-- 
cgit v1.2.3


From 6345d2da60eb6eace23bc9b5a6e606726c3caea2 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:57 +0800
Subject: egl_dri2: Use createContextAttribs if swrast version >= 3

v2: Change return type of the new function from int to bool

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/egl_dri2.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 91b5a7cf50f..34348620a47 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1064,12 +1064,33 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
       }
    } else {
       assert(dri2_dpy->swrast);
-      dri2_ctx->dri_context =
-         dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
-                                                  api,
-                                                  dri_config,
-                                                  shared,
-                                                  dri2_ctx);
+      if (dri2_dpy->swrast->base.version >= 3) {
+         unsigned error;
+         unsigned num_attribs = 8;
+         uint32_t ctx_attribs[8];
+
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
+
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+                                                   api,
+                                                   dri_config,
+                                                   shared,
+                                                   num_attribs / 2,
+                                                   ctx_attribs,
+                                                   & error,
+                                                   dri2_ctx);
+         dri2_create_context_attribs_error(error);
+      } else {
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+                                                     api,
+                                                     dri_config,
+                                                     shared,
+                                                     dri2_ctx);
+      }
    }
 
    if (!dri2_ctx->dri_context)
-- 
cgit v1.2.3


From 63c4b7ee1e76efece745fc581ac4990d197824cc Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:58 +0800
Subject: egl_dri2: Add support for EGL_KHR_create_contest when using swrast

This requires swrast version >= 3. Also EGL_EXT_create_context_robostness
is supported if __DRI2_ROBUSTNESS extension is found.

Reference: https://bugs.freedesktop.org/show_bug.cgi?id=80821
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/egl_dri2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 34348620a47..eb56113a4ea 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -589,7 +589,8 @@ dri2_setup_screen(_EGLDisplay *disp)
                                    __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
       disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
 
-   if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+   if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+       (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
       disp->Extensions.KHR_create_context = EGL_TRUE;
 
       if (dri2_dpy->robustness)
-- 
cgit v1.2.3


From c8984a7a4686c2045666d32fbe5733ff5a5c3bd8 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 15 Jul 2015 10:28:05 +0100
Subject: xmlpool: 'promote' LOCALEDIR variable

This is the only place in mesa that uses this constuct which seems
to be GNUmake-ism. Attempting to build with POSIX make implementations
(bmake) would fail as below.

--- options.h ---
LOCALEDIR := .
sh: line 2: LOCALEDIR: command not found
*** [options.h] Error code 127

So let's keep things consistent and compatible by making the variable
non target specific.

v2:
 - Bring back LOCALEDIR.
 - Reword the commit message
 - Change mesa-stable tag 10.6 > 11.0

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Cc: Jonathan Gray <jsg@jsg.id.au>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/drivers/dri/common/xmlpool/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile.am b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
index a6f1652d105..dfd8fb8dc6d 100644
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -67,7 +67,7 @@ CLEANFILES = \
 	$(MOS)
 
 # Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
 options.h: t_options.h $(MOS)
 	$(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
 
-- 
cgit v1.2.3


From d65bd7a7be48d7805f68cd45218794f3e4590408 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 10:21:06 +0100
Subject: util: automake: rework the format_srgb.c rule

A handful of changes/cleanups paving the way to bmake support:
 - Remove optional $(srcdir)/ prefix for files in the prereq list.
 - Drop the space after the AM_V_GEN variable.
 - Using $< in a non-suffix rule is a GNU make idiom.
 - Use $(@D) over $(dir $@). The latter is a POSIX standard.

v2: Cosmetic tweaks in the commit summary.

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com> (v1)
---
 src/util/Makefile.am | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index 761023d186f..e05a2c5958c 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -55,5 +55,7 @@ BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = format_srgb.py SConscript
 
-format_srgb.c: $(srcdir)/format_srgb.py
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+format_srgb.c: format_srgb.py
+	$(PYTHON_GEN) $(srcdir)/format_srgb.py > $@
-- 
cgit v1.2.3


From 449ce5d64f3d0e5840287040755df23e86ce6bb2 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 9 Sep 2015 12:14:00 +0100
Subject: mapi: automake: rework the *api/glapi_mapi_tmp.h rules

Same logic as previous commit applies.

v2: Merge with "inline glapi_gen_mapi define" (Matt)

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mapi/Makefile.am | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
index 160a255af6a..307e05d503f 100644
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -50,19 +50,14 @@ AM_CPPFLAGS =							\
 
 include Makefile.sources
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 glapi_gen_mapi_deps := \
 	mapi_abi.py \
 	$(wildcard glapi/gen/*.xml) \
 	$(wildcard glapi/gen/*.py)
 
-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
-	--mode lib --printer $(2) $(1) > $@
-endef
-
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
 
@@ -93,7 +88,9 @@ shared_glapi_test_LDADD = \
 endif
 
 shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,shared-glapi)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 if HAVE_OPENGL
 noinst_LTLIBRARIES = glapi/libglapi.la
@@ -185,7 +182,9 @@ endif
 endif
 
 es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es1api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 if HAVE_OPENGL_ES2
 TESTS += es2api/ABI-check
@@ -229,6 +228,8 @@ endif
 endif
 
 es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es2api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 include $(top_srcdir)/install-lib-links.mk
-- 
cgit v1.2.3


From 96509aa80429db1884a78fae95c169aa40641e84 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 10:27:29 +0100
Subject: mapi: automake: rework the source generation rules

Same logic as previous commit applies. Also fix bogus MESA_MAPI_DIR -
the sources are located in the source dir (duh).

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mapi/glapi/gen/Makefile.am | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 9224de2b9aa..7d8dfcb7e2e 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -20,7 +20,7 @@ XORG_INDENT_FLAGS = -linux -bad -bap -blf -bli0 -cbi0 -cdw -nce -cs -i4 -lc80 -p
 
 MESA_DIR = $(top_builddir)/src/mesa
 MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
 MESA_GLX_DIR = $(top_builddir)/src/glx
 
 MESA_GLAPI_OUTPUTS = \
@@ -210,7 +210,7 @@ COMMON = $(API_XML) \
 
 COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py
 
-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 
 ######################################################################
 
@@ -242,65 +242,65 @@ $(XORG_GLAPI_DIR)/%.h: $(MESA_GLAPI_DIR)/%.h
 ######################################################################
 
 $(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
-	$(PYTHON_GEN) $< \
+	$(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
 		--printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
-	$(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
 
 $(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 $(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@
 
 $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@
 
 $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
 	    --header-tag _INDIRECT_SIZE_H_ \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 ######################################################################
-- 
cgit v1.2.3


From fd913f47b7fcc724d8d191f2752f328d037abb20 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 10:44:30 +0100
Subject: mesa: automake: rework the source generation rules

Same logic as previous commit applies.

Additionally remove the odd (set -e/mv/INDENT) from the rules.
The last one is the only one we remotely care about, if reading the
generated sources.

Upcoming work from DylanB which will replace the existing python
scripts with ones that produce more readable output anyway.

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/Makefile.am | 45 ++++++++++++++++++---------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index eb4a3da3c84..69506f2a3fd 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -90,37 +90,24 @@ CLEANFILES = \
 	program/program_parse.tab.h \
 	main/git_sha1.h.tmp
 
-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 
-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py 	\
-		 $(GET_HASH_GEN)
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)		\
-		-f $< > $@.tmp;						\
-	mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+                 main/get_hash_generator.py
+	$(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+		-f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@
 
-main/format_info.h: main/formats.csv                                    \
+main/format_info.h: main/formats.csv \
                     main/format_parser.py main/format_info.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py        \
-                   $< > $@.tmp;                                         \
-	mv $@.tmp $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@
 
-main/format_pack.c: main/format_pack.py main/formats.csv		\
+main/format_pack.c: main/format_pack.py main/formats.csv \
                     main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_pack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@
 
 main/format_unpack.c: main/format_unpack.py main/formats.csv	\
                       main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_unpack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@
 
 main/formats.c: main/format_info.h
 
@@ -201,13 +188,17 @@ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
 program/lex.yy.c: program/program_lexer.l
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l
 
 program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y
 
 if GEN_ASM_OFFSETS
 matypes.h: $(gen_matypes_SOURCES)
-- 
cgit v1.2.3


From 9e0594418d8fa47e19bfe57450198d3fa7d087a0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 9 Sep 2015 12:28:37 +0100
Subject: glsl: automake: rework the sources generation rules

The glsl equivalent of "mesa: automake: rework the source generation
rules". Plus let's make things consistent and always explicitly provide
the header name.

v2: Rebase on top of reverted "remove custom AM_V_LEX/YACC" (Matt)

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glsl/Makefile.am | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 98dcb37fc74..9a7f33d7134 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -197,19 +197,23 @@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
 am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+	$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
 
 glsl_lexer.cpp: glsl_lexer.ll
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
 
 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
 
 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
 
 # Only the parsers (specifically the header files generated at the same time)
 # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@ -239,22 +243,24 @@ dist-hook:
 	$(RM) glcpp/tests/*.out
 	$(RM) glcpp/tests/subtest*/*.out
 
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
 
 nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
 
 nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
 
 nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
 
 nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
-- 
cgit v1.2.3


From da5e4559ee3b239d2483645ed54b35aa6628fbaf Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 13:28:00 +0100
Subject: glsl: automake: reuse $(NIR_GENERATED_FILES) where possible

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/Makefile.am | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 9a7f33d7134..831849859ac 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -226,11 +226,7 @@ BUILT_SOURCES =						\
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
 	glcpp/glcpp-lex.c				\
-	nir/nir_builder_opcodes.h				\
-	nir/nir_constant_expressions.c			\
-	nir/nir_opcodes.c				\
-	nir/nir_opcodes.h				\
-	nir/nir_opt_algebraic.c
+	$(NIR_GENERATED_FILES)
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
-- 
cgit v1.2.3


From a3b05e04921a4fcc05cfc994e415e3ceb39fd184 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 13:30:51 +0100
Subject: glsl: build: use makefile.sources variables when possible

Rather than folding one variable within the other only to unwrap them,
just use the ones we need.

v2: bring back LOCAL_PATH prefix for nir_constant_expressions,h

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com> (v1)
---
 src/glsl/Android.gen.mk   | 19 +++++--------------
 src/glsl/Makefile.am      |  7 +++++--
 src/glsl/Makefile.sources |  3 +--
 3 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 0835871f912..798884f2602 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -29,18 +29,7 @@ endif
 
 intermediates := $(call local-generated-sources-dir)
 
-sources := \
-	glsl_lexer.cpp \
-	glsl_parser.cpp \
-	glcpp/glcpp-lex.c \
-	glcpp/glcpp-parse.c \
-	nir/nir_builder_opcodes.h \
-	nir/nir_constant_expressions.c \
-	nir/nir_opcodes.c \
-	nir/nir_opcodes.h \
-	nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
 
 LOCAL_C_INCLUDES += \
 	$(intermediates)/glcpp \
@@ -51,8 +40,10 @@ LOCAL_C_INCLUDES += \
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
 	$(intermediates)/nir
 
-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+	$(LIBGLCPP_GENERATED_FILES) \
+	$(NIR_GENERATED_FILES) \
+	$(LIBGLSL_GENERATED_CXX_FILES))
 
 define local-l-or-ll-to-c-or-cpp
 	@mkdir -p $(dir $@)
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 831849859ac..7dfd60404ee 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -140,13 +140,16 @@ libglsl_la_SOURCES =					\
 	glsl_parser.cpp					\
 	glsl_parser.h					\
 	$(LIBGLSL_FILES)				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
+
 
 libnir_la_SOURCES =					\
 	glsl_types.cpp					\
 	builtin_types.cpp				\
 	glsl_symbol_table.cpp				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
 
 glsl_compiler_SOURCES = \
 	$(GLSL_COMPILER_CXX_FILES)
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index c422303ddac..8f468e10ee3 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -73,8 +73,7 @@ NIR_FILES = \
 	nir/nir_vla.h \
 	nir/nir_worklist.c \
 	nir/nir_worklist.h \
-	nir/nir_types.cpp \
-	$(NIR_GENERATED_FILES)
+	nir/nir_types.cpp
 
 # libglsl
 
-- 
cgit v1.2.3


From c373eaedfc09ff2af7002b64ba0ae8ba71df86a1 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 9 Sep 2015 12:40:03 +0100
Subject: glsl: build: remove bogus dependency

v2: rebase on top of the previous commit - don't touch the LOCAL_PATH
prefix for nir_constant_expressions.h

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/Android.gen.mk | 3 +--
 src/glsl/Makefile.am    | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 798884f2602..6898fb0d492 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -93,8 +93,7 @@ $(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
 nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
 nir_constant_expressions_deps := \
 	$(LOCAL_PATH)/nir/nir_opcodes.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.h
+	$(LOCAL_PATH)/nir/nir_constant_expressions.py
 
 $(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
 	@mkdir -p $(dir $@)
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 7dfd60404ee..1aa9caa3b5a 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -248,7 +248,7 @@ nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
 	$(MKDIR_GEN)
 	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
 
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
 	$(MKDIR_GEN)
 	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
 
-- 
cgit v1.2.3


From 0d39279448bbda6e824bcfd4997b4583bc0481af Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 17 Jul 2015 10:52:35 +0100
Subject: auxiliary: rework the python generated sources rules

There are a few bits this commit aims to resolve:

One can generalise the mkdir rule to a simple MKDIR_P $(@D) which will
expand appropriately for even if we change the subdir name, and/or add
new rules. We can also drop the explicit $(srcdir) prefix for the
dependency rules, they they are not strictly required, nor used
elsewhere in mesa.

Finally replace $< with explicit filename to be consistent through the
file, and honour PYTHON_FLAGS.

v2: Add comprehensive commit summary/message (Ian, Matt)

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/auxiliary/Makefile.am | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 04f77d002c8..a728162bd9d 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \
 
 endif
 
-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
-
-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
-
-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
-
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@
+
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@
+
+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
 
 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
-- 
cgit v1.2.3


From d9df8c2fa274629de689c7e364c3c1a65f0b7d0c Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 9 Sep 2015 13:22:23 +0100
Subject: svga: pick all the files into the tarball

Signed-off-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/gallium/drivers/svga/Makefile.sources | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/Makefile.sources b/src/gallium/drivers/svga/Makefile.sources
index 0bd4f289781..5c022f437ad 100644
--- a/src/gallium/drivers/svga/Makefile.sources
+++ b/src/gallium/drivers/svga/Makefile.sources
@@ -14,6 +14,7 @@ C_SOURCES := \
 	svga_format.h \
 	svga_hw_reg.h \
 	svga_link.c \
+	svga_link.h \
 	svga_pipe_blend.c \
 	svga_pipe_blit.c \
 	svga_pipe_clear.c \
@@ -27,7 +28,7 @@ C_SOURCES := \
 	svga_pipe_query.c \
 	svga_pipe_rasterizer.c \
 	svga_pipe_sampler.c \
-        svga_pipe_streamout.c \
+	svga_pipe_streamout.c \
 	svga_pipe_vertex.c \
 	svga_pipe_vs.c \
 	svga_public.h \
@@ -51,15 +52,16 @@ C_SOURCES := \
 	svga_state.h \
 	svga_state_constants.c \
 	svga_state_framebuffer.c \
+	svga_state_fs.c \
+	svga_state_gs.c \
 	svga_state_need_swtnl.c \
 	svga_state_rss.c \
-	svga_state_tss.c \
-	svga_state_vdecl.c \
 	svga_state_sampler.c \
-	svga_state_fs.c \
-	svga_state_gs.c \
 	svga_state_tgsi_transform.c \
+	svga_state_tss.c \
+	svga_state_vdecl.c \
 	svga_state_vs.c \
+	svga_streamout.h \
 	svga_surface.c \
 	svga_surface.h \
 	svga_swtnl_backend.c \
@@ -82,3 +84,22 @@ C_SOURCES := \
 	svgadump/svga_shader.h \
 	svgadump/svga_shader_op.c \
 	svgadump/svga_shader_op.h
+
+SVGA_H_FILES := \
+	include/includeCheck.h \
+	include/svga3d_caps.h \
+	include/svga3d_cmd.h \
+	include/svga3d_devcaps.h \
+	include/svga3d_dx.h \
+	include/svga3d_limits.h \
+	include/svga3d_reg.h \
+	include/svga3d_shaderdefs.h \
+	include/svga3d_surfacedefs.h \
+	include/svga3d_types.h \
+	include/svga_escape.h \
+	include/svga_overlay.h \
+	include/svga_reg.h \
+	include/svga_types.h \
+	include/VGPU10ShaderTokens.h \
+	include/vmware_pack_begin.h \
+	include/vmware_pack_end.h
-- 
cgit v1.2.3


From e838d91b94c3d1d20db62a61bfd9163f675d3139 Mon Sep 17 00:00:00 2001
From: Mauro Rossi <issor.oruam@gmail.com>
Date: Fri, 21 Aug 2015 23:46:27 +0200
Subject: nouveau: android: add space before PRIx64 macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise the android build fails with

   error : unable to find string literal operator ‘operator"" PRIx64’

There are several resources referring to the problem, which is related
to c++11, in our case used when building mesa for lollipop.

http://comments.gmane.org/gmane.comp.graphics.opensg.user/5883

I've not investigated all the semantics, some people even suggested a
bug in the gcc compiler,
I just saw the building error was solved with one little space for
lollipop and no side effect when c+11 not used.

v2: [Emil Velikov] add an alternative commit message from Mauro.

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 9ebdc6586db..5f30f3d354b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -411,7 +411,7 @@ int ImmediateValue::print(char *buf, size_t size, DataType ty) const
    case TYPE_U64:
    case TYPE_S64:
    default:
-      PRINT("0x%016"PRIx64, reg.data.u64);
+      PRINT("0x%016" PRIx64, reg.data.u64);
       break;
    }
    return pos;
-- 
cgit v1.2.3


From c12ffb30b4a53eda55ef8f541b760c309c488e66 Mon Sep 17 00:00:00 2001
From: Mauro Rossi <issor.oruam@gmail.com>
Date: Sun, 6 Sep 2015 14:15:38 +0200
Subject: android: radeonsi: add support for sid_tables.h generated sources

This patch is necessary to avoid building error on android,
due to missing sid_tables.h generated sources

v2:[Emil Velikov] Correctly split the lists.

Fixes: fbbebeae10f(radeonsi: inline si_cmd_context_control)
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/drivers/radeonsi/Android.mk       | 12 +++++++++++-
 src/gallium/drivers/radeonsi/Makefile.am      |  2 +-
 src/gallium/drivers/radeonsi/Makefile.sources |  4 +++-
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/Android.mk b/src/gallium/drivers/radeonsi/Android.mk
index 57f3bef36b0..b469aca946e 100644
--- a/src/gallium/drivers/radeonsi/Android.mk
+++ b/src/gallium/drivers/radeonsi/Android.mk
@@ -23,7 +23,7 @@
 
 LOCAL_PATH := $(call my-dir)
 
-# get C_SOURCES
+# get C_SOURCES and GENERATED_SOURCES
 include $(LOCAL_PATH)/Makefile.sources
 
 include $(CLEAR_VARS)
@@ -33,5 +33,15 @@ LOCAL_SRC_FILES := $(C_SOURCES)
 LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_radeonsi
 
+# generate sources
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/sid_tables.h:  $(intermediates)/%.h: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/sid.h
+	$(transform-generated-source)
+
 include $(GALLIUM_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/radeonsi/Makefile.am b/src/gallium/drivers/radeonsi/Makefile.am
index ae5035a8937..c506666e3c8 100644
--- a/src/gallium/drivers/radeonsi/Makefile.am
+++ b/src/gallium/drivers/radeonsi/Makefile.am
@@ -30,7 +30,7 @@ AM_CFLAGS = \
 
 noinst_LTLIBRARIES = libradeonsi.la
 
-libradeonsi_la_SOURCES = $(C_SOURCES)
+libradeonsi_la_SOURCES = $(C_SOURCES) $(GENERATED_SOURCES)
 
 sid_tables.h: $(srcdir)/sid_tables.py $(srcdir)/sid.h
 	$(AM_V_GEN) $(PYTHON2) $(srcdir)/sid_tables.py $(srcdir)/sid.h > $@
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 5f5eac12be0..7e997c6d526 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -6,7 +6,6 @@ C_SOURCES := \
 	si_debug.c \
 	si_descriptors.c \
 	sid.h \
-	sid_tables.h \
 	si_dma.c \
 	si_hw_context.c \
 	si_pipe.c \
@@ -21,3 +20,6 @@ C_SOURCES := \
 	si_state_shaders.c \
 	si_state.h \
 	si_uvd.c
+
+GENERATED_SOURCES := \
+	sid_tables.h
-- 
cgit v1.2.3


From 30a915bd174a4a4dd950870eca19dde45bbd524b Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 3 Sep 2015 14:53:03 -0400
Subject: gallium/docs: clairify dmabuf fd ownership

Since debugging issues w/ fd's close()d at the wrong time can be quite
fun, this should probably be made more explicit in the docs.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/include/pipe/p_screen.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index a7b7b72ac89..a22fb938dbb 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -173,6 +173,10 @@ struct pipe_screen {
     * Create a texture from a winsys_handle. The handle is often created in
     * another process by first creating a pipe texture and then calling
     * resource_get_handle.
+    *
+    * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+    * retains ownership of the FD.  (This is consistent with
+    * EGL_EXT_image_dma_buf_import)
     */
    struct pipe_resource * (*resource_from_handle)(struct pipe_screen *,
 						  const struct pipe_resource *templat,
@@ -190,6 +194,10 @@ struct pipe_screen {
     * Get a winsys_handle from a texture. Some platforms/winsys requires
     * that the texture is created with a special usage flag like
     * DISPLAYTARGET or PRIMARY.
+    *
+    * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+    * takes ownership of the FD.  (This is consistent with
+    * EGL_MESA_image_dma_buf_export)
     */
    boolean (*resource_get_handle)(struct pipe_screen *,
 				  struct pipe_resource *tex,
-- 
cgit v1.2.3


From ac066bf65cb585a4f6b4a2fb1d055b033f2b94ae Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Sep 2015 15:52:08 +0200
Subject: nv30: Fix color resolving for nv3x cards

We do not have a generic blitter on nv3x cards, so we must use the
sifm object for color resolving.

This commit divides the sources and dest surfaces in to tiles which
match the constraints of the sifm object, so that color resolving
will work properly on nv3x cards.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 38 ++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 76bb8b817fc..735c718c4b3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -149,14 +149,50 @@ static void
 nv30_resource_resolve(struct nv30_context *nv30,
                       const struct pipe_blit_info *info)
 {
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
    struct nv30_rect src, dst;
+   unsigned x, x0, x1, y, y1, w, h;
 
    define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
       info->src.box.y, info->src.box.width, info->src.box.height, &src);
    define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
       info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
 
-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+      h = y1 - y;
+      if (h > 1024)
+         h = 1024;
+
+      src.y0 = 0;
+      src.y1 = h;
+      src.h = h;
+
+      dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+      dst.h = h >> src_mt->ms_y;
+
+      for (x = x0; x < x1; x += w) {
+         w = x1 - x;
+         if (w > 1024)
+            w = 1024;
+
+         src.offset = y * src.pitch + x * src.cpp;
+         src.x0 = 0;
+         src.x1 = w;
+         src.w = w;
+
+         dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+                      (x >> src_mt->ms_x) * dst.cpp;
+         dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+         dst.w = w >> src_mt->ms_x;
+
+         nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+      }
+   }
 }
 
 void
-- 
cgit v1.2.3


From 3e9df0e3af7a8a84147ae48f588e9c435bf65b98 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Sep 2015 15:52:09 +0200
Subject: nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA

Some modern apps try to use msaa without keeping in mind the
restrictions on videomem of older cards. Resulting in dmesg saying:

 [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
 [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
 [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12

Because we are running out of video memory, after which the program
using the msaa visual freezes, and eventually the entire system freezes.

To work around this we do not allow msaa visauls by default and allow
the user to override this via NV30_MAX_MSAA.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[imirkin: move env var lookup to screen so that it's only done once]
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv30/nv30_screen.c | 20 +++++++++++++++++++-
 src/gallium/drivers/nouveau/nv30/nv30_screen.h |  2 ++
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 7aad26ba18b..efa3a59f450 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -319,8 +319,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
                                 unsigned sample_count,
                                 unsigned bindings)
 {
-   if (sample_count > 4)
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
       return false;
+
    if (!(0x00000017 & (1 << sample_count)))
       return false;
 
@@ -450,6 +451,23 @@ nv30_screen_create(struct nouveau_device *dev)
       return NULL;
    }
 
+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
    pscreen = &screen->base.base;
    pscreen->destroy = nv30_screen_destroy;
    pscreen->get_param = nv30_screen_get_param;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.h b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
index 7b17b88097c..df11233d07a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -38,6 +38,8 @@ struct nv30_screen {
    /*XXX: nvfx state */
    struct nouveau_heap *vp_exec_heap;
    struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
 };
 
 static inline struct nv30_screen *
-- 
cgit v1.2.3


From f5e08ab6b1351c41000fd2f1a16c1273d2f74d40 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Fri, 28 Aug 2015 17:17:39 -0700
Subject: nir/cursor: Add a constructor for the end of a block but before the
 jump

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/nir/nir.h          | 11 +++++++++++
 src/glsl/nir/nir_from_ssa.c |  7 +------
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3c375f30c53..2bcd18aabe5 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1607,6 +1607,17 @@ nir_after_instr(nir_instr *instr)
    return cursor;
 }
 
+static inline nir_cursor
+nir_after_block_before_jump(nir_block *block)
+{
+   nir_instr *last_instr = nir_block_last_instr(block);
+   if (last_instr && last_instr->type == nir_instr_type_jump) {
+      return nir_before_instr(last_instr);
+   } else {
+      return nir_after_block(block);
+   }
+}
+
 static inline nir_cursor
 nir_before_cf_node(nir_cf_node *node)
 {
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 1fd8b24d33d..94002f18cd7 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -249,12 +249,7 @@ add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
       nir_parallel_copy_instr *pcopy =
          nir_parallel_copy_instr_create(state->dead_ctx);
 
-      nir_instr *last_instr = nir_block_last_instr(block);
-      if (last_instr && last_instr->type == nir_instr_type_jump) {
-         nir_instr_insert_before(last_instr, &pcopy->instr);
-      } else {
-         nir_instr_insert_after_block(block, &pcopy->instr);
-      }
+      nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr);
    }
 
    return true;
-- 
cgit v1.2.3


From 1dbe4af9c9e318525fc082b542b93fb7f1e5efba Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Fri, 28 Aug 2015 17:09:02 -0700
Subject: nir: Add a pass to lower outputs to temporary variables

This pass can be used as a helper for NIR producers so they don't have to
worry about creating the temporaries themselves.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/Makefile.sources                       |   1 +
 src/glsl/nir/nir.h                              |   2 +
 src/glsl/nir/nir_lower_outputs_to_temporaries.c | 127 ++++++++++++++++++++++++
 3 files changed, 130 insertions(+)
 create mode 100644 src/glsl/nir/nir_lower_outputs_to_temporaries.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 8f468e10ee3..da7fdf95902 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -40,6 +40,7 @@ NIR_FILES = \
 	nir/nir_lower_locals_to_regs.c \
 	nir/nir_lower_idiv.c \
 	nir/nir_lower_io.c \
+	nir/nir_lower_outputs_to_temporaries.c \
 	nir/nir_lower_phis_to_scalar.c \
 	nir/nir_lower_samplers.cpp \
 	nir/nir_lower_system_values.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 2bcd18aabe5..2ba7731c1ff 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1786,6 +1786,8 @@ void nir_lower_global_vars_to_local(nir_shader *shader);
 
 void nir_lower_locals_to_regs(nir_shader *shader);
 
+void nir_lower_outputs_to_temporaries(nir_shader *shader);
+
 void nir_assign_var_locations(struct exec_list *var_list,
                               unsigned *size,
                               int (*type_size)(const struct glsl_type *));
diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
new file mode 100644
index 00000000000..b730cad0020
--- /dev/null
+++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * Implements a pass that lowers output variables to a temporary plus an
+ * output variable with a single copy at each exit point of the shader.
+ * This way the output variable is only ever written.
+ *
+ * Because valid NIR requires that output variables are never read, this
+ * pass is more of a helper for NIR producers and must be run before the
+ * shader is ever validated.
+ */
+
+#include "nir.h"
+
+struct lower_outputs_state {
+   nir_shader *shader;
+   struct exec_list old_outputs;
+};
+
+static void
+emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
+{
+   assert(exec_list_length(&state->shader->outputs) ==
+          exec_list_length(&state->old_outputs));
+
+   foreach_two_lists(out_node, &state->shader->outputs,
+                     temp_node, &state->old_outputs) {
+      nir_variable *output = exec_node_data(nir_variable, out_node, node);
+      nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
+
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+      copy->variables[0] = nir_deref_var_create(copy, output);
+      copy->variables[1] = nir_deref_var_create(copy, temp);
+
+      nir_instr_insert(cursor, &copy->instr);
+   }
+}
+
+static bool
+emit_output_copies_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+         emit_output_copies(nir_before_instr(&intrin->instr), state);
+   }
+
+   return true;
+}
+
+void
+nir_lower_outputs_to_temporaries(nir_shader *shader)
+{
+   struct lower_outputs_state state;
+
+   state.shader = shader;
+   exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
+
+   /* Walk over all of the outputs turn each output into a temporary and
+    * make a new variable for the actual output.
+    */
+   foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
+      nir_variable *output = ralloc(shader, nir_variable);
+      memcpy(output, var, sizeof *output);
+
+      /* The orignal is now the temporary */
+      nir_variable *temp = var;
+
+      /* Give the output a new name with @out-temp appended */
+      temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
+      temp->data.mode = nir_var_global;
+      temp->constant_initializer = NULL;
+
+      exec_list_push_tail(&shader->outputs, &output->node);
+   }
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl == NULL)
+         continue;
+
+      if (shader->stage == MESA_SHADER_GEOMETRY) {
+         /* For geometry shaders, we have to emit the output copies right
+          * before each EmitVertex call.
+          */
+         nir_foreach_block(overload->impl, emit_output_copies_block, &state);
+      } else if (strcmp(overload->function->name, "main") == 0) {
+         /* For all other shader types, we need to do the copies right before
+          * the jumps to the end block.
+          */
+         struct set_entry *block_entry;
+         set_foreach(overload->impl->end_block->predecessors, block_entry) {
+            struct nir_block *block = (void *)block_entry->key;
+            emit_output_copies(nir_after_block_before_jump(block), &state);
+         }
+      }
+
+      nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+                                            nir_metadata_dominance);
+   }
+
+   exec_list_append(&shader->globals, &state.old_outputs);
+}
-- 
cgit v1.2.3


From b828f7a27b2b8d6b90e646c08dfa25e1d737a9bb Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 8 Sep 2015 14:31:11 -0700
Subject: nir/glsl: Use lower_outputs_to_temporaries instead of relying on GLSL
 IR

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/nir/glsl_to_nir.cpp             | 2 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 0712908eee8..ad7d7dddeaf 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -138,6 +138,8 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
    v2.run(sh->ir);
    visit_exec_list(sh->ir, &v1);
 
+   nir_lower_outputs_to_temporaries(shader);
+
    return shader;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 445764d3d06..de1a7fe81a8 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -322,9 +322,6 @@ process_glsl_ir(gl_shader_stage stage,
                                         options, ctx->Const.NativeIntegers) || progress;
    } while (progress);
 
-   if (options->NirOptions != NULL)
-      lower_output_reads(stage, shader->ir);
-
    validate_ir_tree(shader->ir);
 
    /* Now that we've finished altering the linked IR, reparent any live IR back
-- 
cgit v1.2.3


From e50c01d5af305e07110cb4a38d5a655437058f04 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 9 Sep 2015 03:17:38 -0400
Subject: nvc0: keep track of cb bindings per buffer, use for upload settings

CB updates to bound buffers need to go through the CB_DATA endpoints,
otherwise the shader may not notice that the updates happened.
Furthermore, these updates have to go in to the same address as the
bound buffer, otherwise, again, the shader may not notice updates.

So we keep track of all the places where a constbuf is bound, and
iterate over all of them when updating data. If a binding is found that
encompasses the region to be updated, then we use the settings of that
binding for the upload. Otherwise we upload as a regular data update.

This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well
as blurriness in Witcher2.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nouveau_buffer.c       |  4 +-
 src/gallium/drivers/nouveau/nouveau_buffer.h       |  2 +
 src/gallium/drivers/nouveau/nouveau_context.h      |  5 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  8 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c      |  2 +
 .../drivers/nouveau/nvc0/nvc0_state_validate.c     |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   | 46 ++++++++++++++++++++--
 7 files changed, 58 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 912b7789006..4937dae8b06 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
    else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-      nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+      nv->push_cb(nv, buf,
                   base, size / 4, (const uint32_t *)data);
    else
       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index 7e6a6cc804b..d45bf7aebcf 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -41,6 +41,8 @@ struct nv04_resource {
    uint8_t status;
    uint8_t domain;
 
+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
    struct nouveau_fence *fence;
    struct nouveau_fence *fence_wr;
 
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index 24deb7ee4c0..decb2714ede 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -6,6 +6,8 @@
 
 #define NOUVEAU_MAX_SCRATCH_BUFS 4
 
+struct nv04_resource;
+
 struct nouveau_context {
    struct pipe_context pipe;
    struct nouveau_screen *screen;
@@ -23,8 +25,7 @@ struct nouveau_context {
                      unsigned, const void *);
    /* base, size refer to the whole constant buffer */
    void (*push_cb)(struct nouveau_context *,
-                   struct nouveau_bo *, unsigned domain,
-                   unsigned base, unsigned size,
+                   struct nv04_resource *,
                    unsigned offset, unsigned words, const uint32_t *);
 
    /* @return: @ref reduced by nr of references found in context */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 6ed79cf9553..30bee3a0f8c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
                       struct nouveau_bo *dst, unsigned offset, unsigned domain,
                       unsigned size, const void *data);
 void
-nvc0_cb_push(struct nouveau_context *,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
-             unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data);
 
 /* nvc0_vbo.c */
 void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index ee29912eb40..c5bfd03956d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
    }
    nvc0->constbuf_dirty[s] |= 1 << i;
 
+   if (nvc0->constbuf[s][i].u.buf)
+      nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
    pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
 
    nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 47bd66d1e35..aec06097bbd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
                BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                PUSH_DATA (push, (0 << 4) | 1);
             }
-            nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
                          base, nvc0->state.uniform_buffer_bound[s],
                          0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
@@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
                BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
 
                nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+               res->cb_bindings[s] |= 1 << i;
             } else {
                BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                PUSH_DATA (push, (i << 4) | 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 7cc5b4b1f48..d4831bfd9d4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -506,11 +506,48 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
 }
 
 /* This happens rather often with DTD9/st. */
-void
+static void
 nvc0_cb_push(struct nouveau_context *nv,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
+             struct nv04_resource *res,
              unsigned offset, unsigned words, const uint32_t *data)
+{
+   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+   struct nvc0_constbuf *cb = NULL;
+   int s;
+
+   /* Go through all the constbuf binding points of this buffer and try to
+    * find one which contains the region to be updated.
+    */
+   for (s = 0; s < 6 && !cb; s++) {
+      uint16_t bindings = res->cb_bindings[s];
+      while (bindings) {
+         int i = ffs(bindings) - 1;
+         uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+         bindings &= ~(1 << i);
+         if (cb_offset <= offset &&
+             cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+            cb = &nvc0->constbuf[s][i];
+            break;
+         }
+      }
+   }
+
+   if (cb) {
+      nvc0_cb_bo_push(nv, res->bo, res->domain,
+                      res->offset + cb->offset, cb->size,
+                      offset - cb->offset, words, data);
+   } else {
+      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+                    words * 4, data);
+   }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data)
 {
    struct nouveau_pushbuf *push = nv->pushbuf;
 
@@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv,
    assert(!(offset & 3));
    size = align(size, 0x100);
 
+   assert(offset < size);
+   assert(offset + words * 4 <= size);
+
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
    PUSH_DATA (push, size);
    PUSH_DATAh(push, bo->offset + base);
-- 
cgit v1.2.3


From 9ce2e3072696428277988040908c9ae7a0abf9ad Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 9 Sep 2015 17:27:17 -0400
Subject: gallium/ttn: fix cursor handling vs builder

After inserting instructions the cursor.option becomes _after_instr
(even if it started life as an _after_block).  So we cannot simply stash
the current cursor on the if/loop_stack.  Otherwise we end up inserting
instructions after the endif/endloop in the block preceeding the if/
loop.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index db50734efd5..cccc5606ccf 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -921,10 +921,6 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
 {
    nir_builder *b = &c->build;
 
-   /* Save the outside-of-the-if-statement node list. */
-   c->if_stack[c->if_stack_pos] = b->cursor;
-   c->if_stack_pos++;
-
    src = ttn_channel(b, src, X);
 
    nir_if *if_stmt = nir_if_create(b->shader);
@@ -935,6 +931,9 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
    }
    nir_builder_cf_insert(b, &if_stmt->cf_node);
 
+   c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+   c->if_stack_pos++;
+
    b->cursor = nir_after_cf_list(&if_stmt->then_list);
 
    c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
@@ -963,13 +962,12 @@ ttn_bgnloop(struct ttn_compile *c)
 {
    nir_builder *b = &c->build;
 
-   /* Save the outside-of-the-loop node list. */
-   c->loop_stack[c->loop_stack_pos] = b->cursor;
-   c->loop_stack_pos++;
-
    nir_loop *loop = nir_loop_create(b->shader);
    nir_builder_cf_insert(b, &loop->cf_node);
 
+   c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
+   c->loop_stack_pos++;
+
    b->cursor = nir_after_cf_list(&loop->body);
 }
 
-- 
cgit v1.2.3


From 342e68dc60eebb20ac1be9f47800ee9e604354f0 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 9 Sep 2015 21:50:03 -0400
Subject: nvc0: remove BGRA4 format support

Something is wrong with the support somewhere. I couldn't get the blob
driver to use it either, although it happily used RGB5_A1.
teximage-colors works, but WoW seems to fail in the menus for drawing
text.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91526
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_formats.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 49a93bf1d91..80f92be682d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
    C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
    F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
    C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
    F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
    F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
 
    C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
-- 
cgit v1.2.3


From bd0245b8b2fba3852178d635f4e9602845ea6dd3 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 26 Aug 2015 13:38:49 +0100
Subject: glsl: Silence unused parameter warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

builtin_variables.cpp:1062:53: warning: unused parameter 'name_as_gs_input' [-Wunused-parameter]
                                         const char *name_as_gs_input)
                                                     ^
builtin_functions.cpp:4774:47: warning: unused parameter 'intrinsic_name' [-Wunused-parameter]
                                   const char *intrinsic_name,
                                               ^
builtin_functions.cpp:4907:66: warning: unused parameter 'state' [-Wunused-parameter]
 _mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
                                                                  ^
builtin_functions.cpp:4915:49: warning: unused parameter 'num_arguments' [-Wunused-parameter]
                                        unsigned num_arguments,
                                                 ^
builtin_functions.cpp:4916:49: warning: unused parameter 'flags' [-Wunused-parameter]
                                        unsigned flags)
                                                 ^
ir_print_visitor.cpp:589:37: warning: unused parameter 'ir' [-Wunused-parameter]
 ir_print_visitor::visit(ir_barrier *ir)
                                     ^
linker.cpp:3212:48: warning: unused parameter 'ctx' [-Wunused-parameter]
 build_program_resource_list(struct gl_context *ctx,
                                                ^
standalone_scaffolding.cpp:65:57: warning: unused parameter ‘id’ [-Wunused-parameter]
 _mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
                                                         ^

v2: Rebase on top of GL_ARB_shader_image_size work (especially
58a86897).  Silence more warnings added by that work.

v3: Remove mention of the removed parameter from comments.  Suggested by
Iago.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu> [v1]
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Cc: "Martin Peres <martin.peres@linux.intel.com>"
---
 src/glsl/ast_to_hir.cpp             |  2 +-
 src/glsl/builtin_functions.cpp      | 14 ++++----------
 src/glsl/builtin_variables.cpp      | 11 ++++-------
 src/glsl/ir.h                       |  3 +--
 src/glsl/ir_print_visitor.cpp       |  2 +-
 src/glsl/linker.cpp                 |  3 +--
 src/glsl/program.h                  |  3 +--
 src/glsl/standalone_scaffolding.cpp |  2 +-
 src/mesa/program/ir_to_mesa.cpp     |  2 +-
 9 files changed, 15 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 517841c99f8..72c6459da3c 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -4578,7 +4578,7 @@ ast_function::hir(exec_list *instructions,
    if (state->es_shader && state->language_version >= 300) {
       /* Local shader has no exact candidates; check the built-ins. */
       _mesa_glsl_initialize_builtin_functions();
-      if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+      if (_mesa_glsl_find_builtin_function_by_name(name)) {
          YYLTYPE loc = this->get_location();
          _mesa_glsl_error(& loc, state,
                           "A shader cannot redefine or overload built-in "
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 5e051996758..3b4a9df808c 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -522,7 +522,6 @@ private:
    void add_function(const char *name, ...);
 
    typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type,
-                                                                          const char *intrinsic_name,
                                                                           unsigned num_arguments,
                                                                           unsigned flags);
 
@@ -738,11 +737,9 @@ private:
    B1(mid3)
 
    ir_function_signature *_image_prototype(const glsl_type *image_type,
-                                           const char *intrinsic_name,
                                            unsigned num_arguments,
                                            unsigned flags);
    ir_function_signature *_image_size_prototype(const glsl_type *image_type,
-                                                const char *intrinsic_name,
                                                 unsigned num_arguments,
                                                 unsigned flags);
    ir_function_signature *_image(image_prototype_ctr prototype,
@@ -4866,7 +4863,6 @@ builtin_builder::_mid3(const glsl_type *type)
 
 ir_function_signature *
 builtin_builder::_image_prototype(const glsl_type *image_type,
-                                  const char *intrinsic_name,
                                   unsigned num_arguments,
                                   unsigned flags)
 {
@@ -4916,9 +4912,8 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
 
 ir_function_signature *
 builtin_builder::_image_size_prototype(const glsl_type *image_type,
-                                       const char *intrinsic_name,
-                                       unsigned num_arguments,
-                                       unsigned flags)
+                                       unsigned /* num_arguments */,
+                                       unsigned /* flags */)
 {
    const glsl_type *ret_type;
    unsigned num_components = image_type->coordinate_components();
@@ -4962,7 +4957,7 @@ builtin_builder::_image(image_prototype_ctr prototype,
                         unsigned num_arguments,
                         unsigned flags)
 {
-   ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name,
+   ir_function_signature *sig = (this->*prototype)(image_type,
                                                    num_arguments, flags);
 
    if (flags & IMAGE_FUNCTION_EMIT_STUB) {
@@ -5043,8 +5038,7 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
 }
 
 ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
-                                         const char *name)
+_mesa_glsl_find_builtin_function_by_name(const char *name)
 {
    ir_function *f;
    mtx_lock(&builtins_lock);
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index dd7804f7b8c..560155aa761 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -383,8 +383,7 @@ private:
    ir_variable *add_uniform(const glsl_type *type, const char *name);
    ir_variable *add_const(const char *name, int value);
    ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
-   void add_varying(int slot, const glsl_type *type, const char *name,
-                    const char *name_as_gs_input);
+   void add_varying(int slot, const glsl_type *type, const char *name);
 
    exec_list * const instructions;
    struct _mesa_glsl_parse_state * const state;
@@ -1059,13 +1058,11 @@ builtin_variable_generator::generate_cs_special_vars()
 /**
  * Add a single "varying" variable.  The variable's type and direction (input
  * or output) are adjusted as appropriate for the type of shader being
- * compiled.  For geometry shaders using {ARB,EXT}_geometry_shader4,
- * name_as_gs_input is used for the input (to avoid ambiguity).
+ * compiled.
  */
 void
 builtin_variable_generator::add_varying(int slot, const glsl_type *type,
-                                        const char *name,
-                                        const char *name_as_gs_input)
+                                        const char *name)
 {
    switch (state->stage) {
    case MESA_SHADER_TESS_CTRL:
@@ -1094,7 +1091,7 @@ void
 builtin_variable_generator::generate_varyings()
 {
 #define ADD_VARYING(loc, type, name) \
-   add_varying(loc, type, name, name "In")
+   add_varying(loc, type, name)
 
    /* gl_Position and gl_PointSize are not visible from fragment shaders. */
    if (state->stage != MESA_SHADER_FRAGMENT) {
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index ede8caa6e47..7cdea017af8 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -2523,8 +2523,7 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
                                  const char *name, exec_list *actual_parameters);
 
 extern ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
-                                         const char *name);
+_mesa_glsl_find_builtin_function_by_name(const char *name);
 
 extern gl_shader *
 _mesa_glsl_get_builtin_function_shader(void);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 8dbd938c58b..b6832692419 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -586,7 +586,7 @@ ir_print_visitor::visit(ir_end_primitive *ir)
 }
 
 void
-ir_print_visitor::visit(ir_barrier *ir)
+ir_print_visitor::visit(ir_barrier *)
 {
    fprintf(f, "(barrier)\n");
 }
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 4838ab44339..94f847e1e98 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3216,8 +3216,7 @@ add_interface_variables(struct gl_shader_program *shProg,
  * resource data.
  */
 void
-build_program_resource_list(struct gl_context *ctx,
-                            struct gl_shader_program *shProg)
+build_program_resource_list(struct gl_shader_program *shProg)
 {
    /* Rebuild resource list. */
    if (shProg->ProgramResourceList) {
diff --git a/src/glsl/program.h b/src/glsl/program.h
index c06541a6105..64f54635f62 100644
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -40,8 +40,7 @@ extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
 
 extern void
-build_program_resource_list(struct gl_context *ctx,
-                            struct gl_shader_program *shProg);
+build_program_resource_list(struct gl_shader_program *shProg);
 
 extern void
 linker_error(struct gl_shader_program *prog, const char *fmt, ...)
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 6033364afc5..e52869f288e 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -62,7 +62,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
 }
 
 void
-_mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+_mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
                    const char *, int)
 {
 }
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 8f58f3edf98..0defed83207 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2979,7 +2979,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       if (!ctx->Driver.LinkShader(ctx, prog)) {
 	 prog->LinkStatus = GL_FALSE;
       } else {
-         build_program_resource_list(ctx, prog);
+         build_program_resource_list(prog);
       }
    }
 
-- 
cgit v1.2.3


From 13a974f9aea03a538c2a67417b5bee8bc732cca2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 26 Aug 2015 13:45:52 +0100
Subject: glsl: Remove ADD_VARYING macro

The purpose of the macro was to create the name_as_gs_input from name.
The previous commit removed the name_as_gs_input from add_varying, so
the macro is unnecessary.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/builtin_variables.cpp | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index 560155aa761..cf1be037525 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1090,32 +1090,29 @@ builtin_variable_generator::add_varying(int slot, const glsl_type *type,
 void
 builtin_variable_generator::generate_varyings()
 {
-#define ADD_VARYING(loc, type, name) \
-   add_varying(loc, type, name)
-
    /* gl_Position and gl_PointSize are not visible from fragment shaders. */
    if (state->stage != MESA_SHADER_FRAGMENT) {
-      ADD_VARYING(VARYING_SLOT_POS, vec4_t, "gl_Position");
-      ADD_VARYING(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+      add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
+      add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
    }
 
    if (state->is_version(130, 0)) {
-       ADD_VARYING(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
+       add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
                    "gl_ClipDistance");
    }
 
    if (compatibility) {
-      ADD_VARYING(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
-      ADD_VARYING(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
+      add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
+      add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
       if (state->stage == MESA_SHADER_FRAGMENT) {
-         ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_Color");
-         ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
+         add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color");
+         add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
       } else {
-         ADD_VARYING(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
-         ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
-         ADD_VARYING(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
-         ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
-         ADD_VARYING(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
+         add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
+         add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
+         add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
+         add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
+         add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
       }
    }
 
-- 
cgit v1.2.3


From a072ef8748a65d286e9b542bb9ea6e020fdcc7f8 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 01:54:30 -0400
Subject: nv50/ir: make edge splitting fix up phi node sources

Unfortunately nv50_ir phi nodes aren't directly connected to the CFG, so
the mapping between source and the actual BB is by inbound edge order.
So when manipulating edges one has to be extremely careful. We were
insufficiently careful when splitting critical edges which resulted in
the phi nodes being confused as to where their sources were coming from.

This primarily manifests itself with the TXL-lowering logic on nv50,
when it is inside of a conditional. I've been unable to trigger the
issue anywhere else so far. This resolves rendering failures
in a number of games like Two Worlds 2, Trine: Enchanted Edition, Trine 2,
XCOM:Enemy Unknown, Stacking. It also improves the situation in
Hearthstone, Sonic Generations, and The Raven: Legacy of a Master Thief.
However more work needs to be done there (splitting a lot more edges
solves it, so it's some other sort of RA-related issue).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90887
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 90 ++++++++++++++++++----
 1 file changed, 77 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 0cd21cf47f5..400b9f09e51 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,6 +25,7 @@
 
 #include <stack>
 #include <limits>
+#include <tr1/unordered_map>
 
 namespace nv50_ir {
 
@@ -222,6 +223,7 @@ private:
    private:
       virtual bool visit(BasicBlock *);
       inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+      inline void splitEdges(BasicBlock *b);
    };
 
    class ArgumentMovesPass : public Pass {
@@ -345,28 +347,55 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
    return (n == 2);
 }
 
-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+      return std::tr1::hash<Instruction*>()(val.first) * 31 +
+         std::tr1::hash<BasicBlock*>()(val.second);
+   }
+};
+
+typedef std::tr1::unordered_map<
+   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
 //
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
 {
-   Instruction *phi, *mov;
    BasicBlock *pb, *pn;
-
+   Instruction *phi;
+   Graph::EdgeIterator ei;
    std::stack<BasicBlock *> stack;
+   int j = 0;
 
-   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
       pb = BasicBlock::get(ei.getNode());
       assert(pb);
       if (needNewElseBlock(bb, pb))
          stack.push(pb);
    }
+
+   // No critical edges were found, no need to perform any work.
+   if (stack.empty())
+      return;
+
+   // We're about to, potentially, reorder the inbound edges. This means that
+   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+   // nodes after the graph has been modified.
+   PhiMap phis;
+
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+   }
+
    while (!stack.empty()) {
       pb = stack.top();
       pn = new BasicBlock(func);
@@ -379,12 +408,47 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
       assert(pb->getExit()->op != OP_CALL);
       if (pb->getExit()->asFlow()->target.bb == bb)
          pb->getExit()->asFlow()->target.bb = pn;
+
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+         phis.erase(it);
+      }
    }
 
+   // Now go through and fix up all of the phi node sources.
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+
+         phi->setSrc(j, it->second);
+      }
+   }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+   Instruction *phi, *mov;
+
+   splitEdges(bb);
+
    // insert MOVs (phi->src(j) should stem from j-th in-BB)
    int j = 0;
    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
-      pb = BasicBlock::get(ei.getNode());
+      BasicBlock *pb = BasicBlock::get(ei.getNode());
       if (!pb->isTerminated())
          pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
 
-- 
cgit v1.2.3


From 641eda0c792e10c2792730b1833353564479a557 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 03:49:36 -0400
Subject: nv50/ir: r63 is only 0 if we are using less than 63 registers

It is advantageous to use r63 instead of r127 since r63 can fit into the
shorter encoding. However if we've RA'd over 63 registers, we must use
r127 as the replacement instead.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index bea293bac99..d87cdfff851 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -202,7 +202,10 @@ NV50LegalizePostRA::visit(Function *fn)
    Program *prog = fn->getProgram();
 
    r63 = new_LValue(fn, FILE_GPR);
-   r63->reg.data.id = 63;
+   if (prog->maxGPR < 63)
+      r63->reg.data.id = 63;
+   else
+      r63->reg.data.id = 127;
 
    // this is actually per-program, but we can do it all on visiting main()
    std::list<Instruction *> *outWrites =
-- 
cgit v1.2.3


From ce28ca713364dbe83cb3c371ca034bc2c2947616 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 03:55:06 -0400
Subject: nv50/ir: fix emission of 8-byte wide interp instruction

This can come up if the target register number is > 63, which is fairly
rare.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91551
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 67ea6df773c..90147668c91 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -884,7 +884,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
    defId(i->def(0), 2);
    srcAddr8(i->src(0), 16);
 
-   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
       code[0] |= 1 << 8;
    } else {
       if (i->op == OP_PINTERP) {
@@ -896,10 +896,11 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
    }
 
    if (i->encSize == 8) {
-      code[1] =
-         (code[0] & (3 << 24)) >> (24 - 16) |
-         (code[0] & (1 <<  8)) << (18 -  8);
-      code[0] &= ~0x03000100;
+      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+         code[1] = 4 << 16;
+      else
+         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+      code[0] &= ~0x03000000;
       code[0] |= 1;
       emitFlagsRd(i);
    }
-- 
cgit v1.2.3


From 74b86b971f3bf9b0482341b07c1cbc2e520fb1d0 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 05:02:26 -0400
Subject: nv50/ir: don't fold immediate into mad if registers are too high

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91551
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index b01ef4128cd..44f74c61304 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2602,6 +2602,10 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
              !isFloatType(i->dType))
             break;
 
+         if (i->getDef(0)->reg.data.id >= 64 ||
+             i->getSrc(0)->reg.data.id >= 64)
+            break;
+
          def = i->getSrc(1)->getInsn();
          if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
             vtmp = i->getSrc(1);
-- 
cgit v1.2.3


From bf58a2c362d5afdba512f40b3eb300154201c7f0 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 8 Sep 2015 15:41:11 -0700
Subject: i965: Advertise 65536 for GL_MAX_UNIFORM_BLOCK_SIZE.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our old value of 16384 is the minimum value.  DirectX apparently
requires 65536 at a minimum; that's also what nVidia and the Intel
Windows driver advertise.  AMD advertises MAX_INT.

Ilia Mirkin noticed that "Shadow Warrior" uses UBOs larger than 16k
on Nouveau, which advertises 65536 bytes for this limit.  Traces
captured on Nouveau don't work on i965 because our lower limit causes
the GLSL linker to reject the captured shaders.  While this isn't
important in and of itself, it does suggest that raising the limit
would be beneficial.

We can read linear buffers up to 2^27 bytes in size, so raising this
should be safe; we could probably even go larger.  For now, matching
nVidia and Intel/Windows seems like a good plan.

We have to reinitialize MaxCombinedUniformComponents as core Mesa will
have set it based on a stale value for MaxUniformBlockSize.

According to Tapani, there's an unreleased game that asserts on this.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/dri/i965/brw_context.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 907b2a07353..7c1c13300dc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    ctx->Const.StripTextureBorder = true;
 
+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
    ctx->Const.MaxDualSourceDrawBuffers = 1;
    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
-- 
cgit v1.2.3


From 1691ead1b8ae4018a805af58977a43ef90af4203 Mon Sep 17 00:00:00 2001
From: Albert Freeman <albertwdfreeman@gmail.com>
Date: Tue, 8 Sep 2015 13:06:40 +0000
Subject: clover: Avoid using typename to allow compilation of clover by clang

When parsing an variable declaration qualified with the typename
keyword, clang attempted to declare a variable with the type of non
type member "enum type type" of module::argument (within the header
file clover/core/module.hpp) instead of the typed member of
module::argument "enum type".

Replaced "typename" with "enum" to force clang to declare the variable
marg_type with type "enum type" of module::argument.

CC: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Albert Freeman <albertwdfreeman@gmail.com>
---
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 7c23a27150b..d74b50df45a 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -465,7 +465,7 @@ namespace {
             const bool is_write_only = access_qual == "write_only";
             const bool is_read_only = access_qual == "read_only";
 
-            typename module::argument::type marg_type;
+            enum module::argument::type marg_type;
             if (is_image2d && is_read_only) {
                marg_type = module::argument::image2d_rd;
             } else if (is_image2d && is_write_only) {
-- 
cgit v1.2.3


From 548bf70fd22ca862692abc83700ff5010f92b9b6 Mon Sep 17 00:00:00 2001
From: Rhys Kidd <rhyskidd@gmail.com>
Date: Tue, 8 Sep 2015 23:15:27 +0800
Subject: mesa: Resolve GCC missing field initializer warning.

Resolve a series of missing field initializer warnings within get_hash_params.py

Of the form:
In file included from mesa/src/mesa/main/get.c:495:0:
mesa/src/mesa/main/get_hash.h:180:5: warning: missing initializer for field
'extra' of 'const struct value_desc' [-Wmissing-field-initializers]
     { GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES, LOC_CUSTOM, TYPE_INT, 0 },
     ^
mesa/src/mesa/main/get.c:165:15: note: 'extra' declared here
    const int *extra;
               ^

This patch addresses some likely code rot around the *extra field, where the
initialization is via C code generated indirectly from a Python script.
It resolves a number of warnings reported by GCC when configured to be pedantic.

$ gcc --version
gcc (Ubuntu 4.9.2-10ubuntu13) 4.9.2

No piglit regressions on Ironlake.

v2:
- Squash series into a single patch.

Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/get_hash_params.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 09e1f0e7986..c06835a1e64 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -234,10 +234,10 @@ descriptor=[
 
 { "apis": ["GLES"], "params": [
 # OES_point_size_array
-  [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN)" ],
-  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM)" ],
-  [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT)" ],
-  [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0" ],
+  [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
 ]},
 
 { "apis": ["GL", "GL_CORE", "GLES2"], "params": [
-- 
cgit v1.2.3


From 32cdb49fe2f0211040bfb16e668169097199bfcc Mon Sep 17 00:00:00 2001
From: Rhys Kidd <rhyskidd@gmail.com>
Date: Tue, 8 Sep 2015 23:52:48 +0800
Subject: glsl: Resolve GCC sign-compare warning.

mesa/src/glsl/nir/nir_lower_tex_projector.c: In function 'nir_lower_tex_projector_block':
mesa/src/glsl/nir/nir_lower_tex_projector.c:63:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int i = 0; i < tex->num_srcs; i++) {
                         ^
mesa/src/glsl/nir/nir_lower_tex_projector.c: In function 'nir_lower_tex_projector_block':
mesa/src/glsl/nir/nir_lower_tex_projector.c:114:38: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int i = proj_index + 1; i < tex->num_srcs; i++) {
                                      ^
mesa/src/glsl/nir/nir_lower_tex_projector.c: In function 'nir_lower_tex_projector_block':
mesa/src/glsl/nir/nir_lower_tex_projector.c:53:39: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
                                       ^
mesa/src/glsl/nir/nir_lower_tex_projector.c:57:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       if (proj_index == tex->num_srcs)
                      ^
mesa/src/glsl/nir/nir_search.c: In function 'match_value':
mesa/src/glsl/nir/nir_search.c:84:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int i = 0; i < num_components; ++i)
                      ^
mesa/src/glsl/nir/nir_search.c: In function 'match_value':
mesa/src/glsl/nir/nir_search.c:110:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          for (int i = 0; i < num_components; ++i) {
                            ^
mesa/src/glsl/nir/nir_search.c: In function 'match_value':
mesa/src/glsl/nir/nir_search.c:139:19: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
             if (i < num_components)
                   ^
mesa/src/glsl/nir/nir_opt_peephole_ffma.c: In function 'get_mul_for_src':
mesa/src/glsl/nir/nir_opt_peephole_ffma.c:130:27: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (unsigned i = 0; i < num_components; i++)
                           ^

Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glsl/nir/nir_lower_tex_projector.c | 6 +++---
 src/glsl/nir/nir_opt_peephole_ffma.c   | 2 +-
 src/glsl/nir/nir_search.c              | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c
index 8a482b182a9..6530021c8b7 100644
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ b/src/glsl/nir/nir_lower_tex_projector.c
@@ -49,7 +49,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
       b->cursor = nir_before_instr(&tex->instr);
 
       /* Find the projector in the srcs list, if present. */
-      int proj_index;
+      unsigned proj_index;
       for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
          if (tex->src[proj_index].src_type == nir_tex_src_projector)
             break;
@@ -60,7 +60,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
          nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
 
       /* Walk through the sources projecting the arguments. */
-      for (int i = 0; i < tex->num_srcs; i++) {
+      for (unsigned i = 0; i < tex->num_srcs; i++) {
          switch (tex->src[i].src_type) {
          case nir_tex_src_coord:
          case nir_tex_src_comparitor:
@@ -111,7 +111,7 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
        */
       nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
                             NIR_SRC_INIT);
-      for (int i = proj_index + 1; i < tex->num_srcs; i++) {
+      for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
          tex->src[i-1].src_type = tex->src[i].src_type;
          nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
       }
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c
index a823adbb465..97538e5e64a 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -127,7 +127,7 @@ get_mul_for_src(nir_alu_src *src, int num_components,
     *   If we reuse swizzle in the loop, then output swizzle would be zyzz.
     */
    memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t));
-   for (unsigned i = 0; i < num_components; i++)
+   for (int i = 0; i < num_components; i++)
       swizzle[i] = swizzle_tmp[src->swizzle[i]];
 
    return alu;
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index c33d6c3eb84..51e69b06d8c 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -81,7 +81,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
       swizzle = identity_swizzle;
    }
 
-   for (int i = 0; i < num_components; ++i)
+   for (unsigned i = 0; i < num_components; ++i)
       new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
 
    switch (value->type) {
@@ -107,7 +107,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
 
          assert(!instr->src[src].abs && !instr->src[src].negate);
 
-         for (int i = 0; i < num_components; ++i) {
+         for (unsigned i = 0; i < num_components; ++i) {
             if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
                return false;
          }
@@ -135,7 +135,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
          state->variables[var->variable].abs = false;
          state->variables[var->variable].negate = false;
 
-         for (int i = 0; i < 4; ++i) {
+         for (unsigned i = 0; i < 4; ++i) {
             if (i < num_components)
                state->variables[var->variable].swizzle[i] = new_swizzle[i];
             else
-- 
cgit v1.2.3


From 1c194840fda7b1a6344aa40def9879b418fa977d Mon Sep 17 00:00:00 2001
From: Rhys Kidd <rhyskidd@gmail.com>
Date: Tue, 8 Sep 2015 23:52:49 +0800
Subject: mesa: Resolve GCC sign-compare warning.

mesa/src/mesa/program/prog_to_nir.c: In function 'setup_registers_and_variables':
/mesa/src/mesa/program/prog_to_nir.c:1059:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int i = 0; i < c->prog->NumTemporaries; i++) {
                      ^

Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/program/prog_to_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index fccd16fc8c0..ec61100356a 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -1056,7 +1056,7 @@ setup_registers_and_variables(struct ptn_compile *c)
    c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
 
    nir_register *reg;
-   for (int i = 0; i < c->prog->NumTemporaries; i++) {
+   for (unsigned i = 0; i < c->prog->NumTemporaries; i++) {
       reg = nir_local_reg_create(b->impl);
       if (!reg) {
          c->error = true;
-- 
cgit v1.2.3


From 2c3007652d5177fe6d018de67be8209a1c3f1b83 Mon Sep 17 00:00:00 2001
From: Rhys Kidd <rhyskidd@gmail.com>
Date: Tue, 8 Sep 2015 23:52:50 +0800
Subject: i965: Resolve GCC sign-compare warning.

mesa/src/mesa/drivers/dri/i965/brw_eu_compact.c: In function 'set_3src_control_index':
mesa/src/mesa/drivers/dri/i965/brw_eu_compact.c:805:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
                      ^
mesa/src/mesa/drivers/dri/i965/brw_eu_compact.c: In function 'set_3src_source_index':
mesa/src/mesa/drivers/dri/i965/brw_eu_compact.c:839:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
                      ^
mesa/src/mesa/drivers/dri/i965/brw_state_dump.c: In function 'dump_sampler_state':
mesa/src/mesa/drivers/dri/i965/brw_state_dump.c:382:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (i = 0; i < size / 16; i++) {
                  ^
mesa/src/mesa/drivers/dri/i965/brw_state_upload.c: In function 'brw_pipeline_state_finished':
mesa/src/mesa/drivers/dri/i965/brw_state_upload.c:801:13: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       if (i != pipeline) {
             ^
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c: In function 'intel_gen7_hiz_buf_create':
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c:1544:47: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int level = mt->first_level; level <= mt->last_level; ++level) {
                                               ^
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c: In function 'intel_gen8_hiz_buf_create':
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c:1638:44: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int level = mt->first_level; level <= mt->last_level; ++level) {
                                            ^
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c: In function 'intel_miptree_alloc_hiz':
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c:1771:44: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
    for (int level = mt->first_level; level <= mt->last_level; ++level) {
                                            ^
mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c:1775:33: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
                                 ^

Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c    | 4 ++--
 src/mesa/drivers/dri/i965/brw_state_dump.c    | 4 ++--
 src/mesa/drivers/dri/i965/brw_state_upload.c  | 2 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 ++++----
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 67f0b45ac04..b798931140f 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -802,7 +802,7 @@ set_3src_control_index(const struct brw_device_info *devinfo,
    if (devinfo->gen >= 9 || devinfo->is_cherryview)
       uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
 
-   for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
       if (gen8_3src_control_index_table[i] == uncompacted) {
          brw_compact_inst_set_3src_control_index(dst, i);
 	 return true;
@@ -836,7 +836,7 @@ set_3src_source_index(const struct brw_device_info *devinfo,
          (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
    }
 
-   for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
       if (gen8_3src_source_index_table[i] == uncompacted) {
          brw_compact_inst_set_3src_source_index(dst, i);
 	 return true;
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b6f4d598e1d..0c974c4c807 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -376,13 +376,13 @@ dump_sdc(struct brw_context *brw, uint32_t offset)
 static void dump_sampler_state(struct brw_context *brw,
 			       uint32_t offset, uint32_t size)
 {
-   int i;
+   unsigned i;
    uint32_t *samp = brw->batch.bo->virtual + offset;
 
    for (i = 0; i < size / 16; i++) {
       char name[20];
 
-      sprintf(name, "WM SAMP%d", i);
+      sprintf(name, "WM SAMP%u", i);
       batch_out(brw, name, offset, 0, "filtering\n");
       batch_out(brw, name, offset, 1, "wrapping, lod\n");
       batch_out(brw, name, offset, 2, "default color pointer\n");
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b2ca9c2c0e7..01c090014e4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -797,7 +797,7 @@ brw_pipeline_state_finished(struct brw_context *brw,
                             enum brw_pipeline pipeline)
 {
    /* Save all dirty state into the other pipelines */
-   for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
+   for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
       if (i != pipeline) {
          brw->state.pipelines[i].mesa |= brw->NewGLState;
          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 19f66b70a59..1259664de2a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1539,7 +1539,7 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
       unsigned H_i = H0;
       unsigned Z_i = Z0;
       hz_height = 0;
-      for (int level = mt->first_level; level <= mt->last_level; ++level) {
+      for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
          unsigned h_i = ALIGN(H_i, vertical_align);
          /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
          hz_height += h_i * Z_i;
@@ -1635,7 +1635,7 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
    unsigned Z_i = Z0;
    unsigned sum_h_i = 0;
    unsigned hz_height_3d_sum = 0;
-   for (int level = mt->first_level; level <= mt->last_level; ++level) {
+   for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
       unsigned i = level - mt->first_level;
       unsigned h_i = ALIGN(H_i, vertical_align);
       /* sum(i=2 to m; h_i) */
@@ -1768,11 +1768,11 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
       return false;
 
    /* Mark that all slices need a HiZ resolve. */
-   for (int level = mt->first_level; level <= mt->last_level; ++level) {
+   for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
       if (!intel_miptree_level_enable_hiz(brw, mt, level))
          continue;
 
-      for (int layer = 0; layer < mt->level[level].depth; ++layer) {
+      for (unsigned layer = 0; layer < mt->level[level].depth; ++layer) {
          struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
          exec_node_init(&m->link);
          m->level = level;
-- 
cgit v1.2.3


From c6502e880bba00f8a68f004fe6be7a4bc275494a Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 19:04:25 +0200
Subject: winsys/amdgpu: calculate the maximum number of compute units

Required for register spilling.

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 875dcd09c6b..c8772490e74 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -110,7 +110,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
    struct amdgpu_heap_info vram, gtt;
    struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
    uint32_t vce_version = 0, vce_feature = 0;
-   int r;
+   int r, i, j;
 
    /* Query hardware and driver information. */
    r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
@@ -248,7 +248,6 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
    ws->info.vram_size = vram.heap_size;
    /* convert the shader clock from KHz to MHz */
    ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
-   ws->info.max_compute_units = 1; /* TODO */
    ws->info.max_se = ws->amdinfo.num_shader_engines;
    ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
    ws->info.has_uvd = uvd.available_rings != 0;
@@ -263,6 +262,18 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
    ws->info.r600_virtual_address = TRUE;
    ws->info.r600_has_dma = dma.available_rings != 0;
 
+   /* Guess what the maximum compute unit number is by looking at the mask
+    * of enabled CUs.
+    */
+   for (i = 0; i < ws->info.max_se; i++)
+      for (j = 0; j < ws->info.max_sh_per_se; j++) {
+         unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+         if (ws->info.max_compute_units < max)
+            ws->info.max_compute_units = max;
+      }
+   ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
    memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
           sizeof(ws->amdinfo.gb_tile_mode));
    ws->info.si_tile_mode_array_valid = TRUE;
-- 
cgit v1.2.3


From 7956eae1c76e298ca1ded46679c1a9bf875ec4ee Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 2 Sep 2015 19:05:09 +0200
Subject: radeonsi: enable VGPR spilling on VI

This fixes corruption in Unigine Heaven on VI

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 7dbb2e30422..85ade311c9c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -205,9 +205,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	r600_target = radeon_llvm_get_r600_target(triple);
 	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
 					   r600_get_llvm_processor_name(sscreen->b.family),
-					   sctx->b.chip_class >= VI ?
-						   "+DumpCode" :
-						   "+DumpCode,+vgpr-spilling",
+					   "+DumpCode,+vgpr-spilling",
 					   LLVMCodeGenLevelDefault,
 					   LLVMRelocDefault,
 					   LLVMCodeModelDefault);
-- 
cgit v1.2.3


From 5c6c5b524649997805d0128d4df9dda5e8567cbb Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 16:40:21 +0200
Subject: r600g: use pipe_resource::width0 instead pb_buffer::size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pb_buffer::size was aligned by 29aaab2b5f55cc6d9a84f58ce2bb8607e76a9dde,
which broke the CMASK code I think.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91881

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 4 ++--
 src/gallium/drivers/r600/r600_state.c      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 6f4cb55f887..0c54a3fe953 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1864,7 +1864,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (resource_offset + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_030008_STRIDE(vb->stride) |
@@ -1934,7 +1934,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 93a74f7c2ca..3464c382dc6 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1022,7 +1022,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 
 		/* CMASK. */
 		if (!rctx->dummy_cmask ||
-		    rctx->dummy_cmask->buf->size < cmask.size ||
+		    rctx->dummy_cmask->b.b.width0 < cmask.size ||
 		    rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
 			struct pipe_transfer *transfer;
 			void *ptr;
@@ -1040,7 +1040,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 
 		/* FMASK. */
 		if (!rctx->dummy_fmask ||
-		    rctx->dummy_fmask->buf->size < fmask.size ||
+		    rctx->dummy_fmask->b.b.width0 < fmask.size ||
 		    rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
 			pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
 			rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
@@ -1709,7 +1709,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (320 + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_038008_STRIDE(vb->stride));
@@ -1758,7 +1758,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
-- 
cgit v1.2.3


From 5fbfd8dd231a70b921ff69677ce35ecc1305932c Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 16:19:33 +0200
Subject: r600/llvm: remove dead code for LLVM 3.3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLVM 3.3 has been unsupported for quite a while.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/r600/r600_llvm.c | 106 -----------------------------------
 1 file changed, 106 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index faf538ccbb5..3362fd00dba 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -77,22 +77,11 @@ static void llvm_load_system_value(
 	default: assert(!"unknown system value");
 	}
 
-#if HAVE_LLVM >= 0x0304
 	ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
 		LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
 		"");
-#else
-	LLVMValueRef reg = lp_build_const_int32(
-			ctx->soa.bld_base.base.gallivm, chan);
-	ctx->system_values[index] = lp_build_intrinsic(
-			ctx->soa.bld_base.base.gallivm->builder,
-			"llvm.R600.load.input",
-			ctx->soa.bld_base.base.elem_type, &reg, 1,
-			LLVMReadNoneAttribute);
-#endif
 }
 
-#if HAVE_LLVM >= 0x0304
 static LLVMValueRef
 llvm_load_input_vector(
 	struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
@@ -131,34 +120,7 @@ llvm_load_input_vector(
 				VecType, Args, ArgCount, LLVMReadNoneAttribute);
 		}
 }
-#else
-static LLVMValueRef
-llvm_load_input_helper(
-	struct radeon_llvm_context * ctx,
-	unsigned idx, int interp, int ij_index)
-{
-	const struct lp_build_context * bb = &ctx->soa.bld_base.base;
-	LLVMValueRef arg[2];
-	int arg_count;
-	const char * intrinsic;
-
-	arg[0] = lp_build_const_int32(bb->gallivm, idx);
-
-	if (interp) {
-		intrinsic = "llvm.R600.interp.input";
-		arg[1] = lp_build_const_int32(bb->gallivm, ij_index);
-		arg_count = 2;
-	} else {
-		intrinsic = "llvm.R600.load.input";
-		arg_count = 1;
-	}
-
-	return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
-		bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
-}
-#endif
 
-#if HAVE_LLVM >= 0x0304
 static LLVMValueRef
 llvm_face_select_helper(
 	struct radeon_llvm_context * ctx,
@@ -171,21 +133,6 @@ llvm_face_select_helper(
 	return LLVMBuildSelect(bb->gallivm->builder, is_front,
 		front_color, back_color, "");
 }
-#else
-static LLVMValueRef
-llvm_face_select_helper(
-	struct radeon_llvm_context * ctx,
-	unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color)
-{
-	const struct lp_build_context * bb = &ctx->soa.bld_base.base;
-	LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0);
-	LLVMValueRef is_front = LLVMBuildFCmp(
-		bb->gallivm->builder, LLVMRealUGT, face,
-		lp_build_const_float(bb->gallivm, 0.0f),	"");
-	return LLVMBuildSelect(bb->gallivm->builder, is_front,
-		front_color, back_color, "");
-}
-#endif
 
 static void llvm_load_input(
 	struct radeon_llvm_context * ctx,
@@ -194,18 +141,11 @@ static void llvm_load_input(
 {
 	const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
 	unsigned chan;
-#if HAVE_LLVM < 0x0304
-	unsigned interp = 0;
-	int ij_index;
-#endif
 	int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
 	LLVMValueRef v;
-#if HAVE_LLVM >= 0x0304
 	boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
 		ctx->type == TGSI_PROCESSOR_FRAGMENT;
-#endif
 
-#if HAVE_LLVM >= 0x0304
 	if (require_interp_intrinsic && input->spi_sid) {
 		v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
 			(input->interpolate > 0));
@@ -241,49 +181,7 @@ static void llvm_load_input(
 				lp_build_const_float(&(ctx->gallivm), 1.0f),
 				ctx->inputs[soa_index], "");
 	}
-}
-#else
-	if (ctx->chip_class >= EVERGREEN && ctx->type == TGSI_PROCESSOR_FRAGMENT &&
-			input->spi_sid) {
-		interp = 1;
-		ij_index = (input->interpolate > 0) ? input->ij_index : -1;
 	}
-
-	for (chan = 0; chan < 4; chan++) {
-		unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
-		int loc;
-
-		if (interp) {
-			loc = 4 * input->lds_pos + chan;
-		} else {
-			if (input->name == TGSI_SEMANTIC_FACE)
-				loc = 4 * ctx->face_gpr;
-			else
-				loc = 4 * input->gpr + chan;
-		}
-
-		v = llvm_load_input_helper(ctx, loc, interp, ij_index);
-
-		if (two_side) {
-			struct r600_shader_io * back_input =
-					&ctx->r600_inputs[input->back_color_input];
-			int back_loc = interp ? back_input->lds_pos : back_input->gpr;
-			LLVMValueRef v2;
-
-			back_loc = 4 * back_loc + chan;
-			v2 = llvm_load_input_helper(ctx, back_loc, interp, ij_index);
-			v = llvm_face_select_helper(ctx, 4 * ctx->face_gpr, v, v2);
-		} else if (input->name == TGSI_SEMANTIC_POSITION &&
-				ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
-			/* RCP for fragcoord.w */
-			v = LLVMBuildFDiv(ctx->gallivm.builder,
-					lp_build_const_float(&(ctx->gallivm), 1.0f),
-					v, "");
-		}
-
-		ctx->inputs[soa_index] = v;
-	}
-#endif
 }
 
 static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
@@ -887,7 +785,6 @@ LLVMModuleRef r600_tgsi_llvm(
 	struct tgsi_shader_info shader_info;
 	struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
 	radeon_llvm_context_init(ctx);
-#if HAVE_LLVM >= 0x0304
 	LLVMTypeRef Arguments[32];
 	unsigned ArgumentsCount = 0;
 	for (unsigned i = 0; i < ctx->inputs_count; i++)
@@ -897,9 +794,6 @@ LLVMModuleRef r600_tgsi_llvm(
 		LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
 		LLVMAddAttribute(P, LLVMInRegAttribute);
 	}
-#else
-	radeon_llvm_create_func(ctx, NULL, 0);
-#endif
 	tgsi_scan_shader(tokens, &shader_info);
 
 	bld_base->info = &shader_info;
-- 
cgit v1.2.3


From e6d3846dd0873a2ded19c6416648ad61f66fbd60 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 16:26:21 +0200
Subject: gallium/radeon: drop support for LLVM 3.4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows using the new tex instrinsics unconditionally.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 configure.ac                                        | 2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c       | 4 ----
 src/gallium/drivers/radeon/radeon_llvm_emit.c       | 6 ------
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 3 +--
 src/gallium/drivers/radeonsi/si_compute.c           | 5 +----
 src/gallium/drivers/radeonsi/si_pipe.c              | 9 +++------
 src/gallium/drivers/radeonsi/si_shader.c            | 5 ++---
 7 files changed, 8 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/configure.ac b/configure.ac
index 1db5b2883bc..d8e56439a54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2073,7 +2073,7 @@ radeon_llvm_check() {
     if test "x$enable_gallium_llvm" != "xyes"; then
         AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
     fi
-    llvm_check_version_for "3" "4" "2" $1 
+    llvm_check_version_for "3" "5" "0" $1
     if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
         AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
     fi
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index ed5d1dabdc3..1302b5a346c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -531,11 +531,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_KAVERI: return "kaveri";
 	case CHIP_HAWAII: return "hawaii";
 	case CHIP_MULLINS:
-#if HAVE_LLVM >= 0x0305
 		return "mullins";
-#else
-		return "kabini";
-#endif
 	case CHIP_TONGA: return "tonga";
 	case CHIP_ICELAND: return "iceland";
 	case CHIP_CARRIZO: return "carrizo";
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 00025590137..3acbd02643e 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -122,8 +122,6 @@ LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
 	return target;
 }
 
-#if HAVE_LLVM >= 0x0305
-
 static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
 {
 	if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
@@ -136,8 +134,6 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
 	}
 }
 
-#endif
-
 /**
  * Compile an LLVM module to machine code.
  *
@@ -180,9 +176,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
 	/* Setup Diagnostic Handler*/
 	llvm_ctx = LLVMGetModuleContext(M);
 
-#if HAVE_LLVM >= 0x0305
 	LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
-#endif
 	rval = 0;
 
 	/* Compile IR*/
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 56694700a47..2e9a0135647 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1520,8 +1520,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
 	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
-	bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
-		HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+	bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
 	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
 	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index e3caf5e0183..ed9147cc91e 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -33,14 +33,11 @@
 #include "sid.h"
 
 #define MAX_GLOBAL_BUFFERS 20
-#if HAVE_LLVM < 0x0305
-#define NUM_USER_SGPRS 2
-#else
+
 /* XXX: Even though we don't pass the scratch buffer via user sgprs any more
  * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
  * with older mesa. */
 #define NUM_USER_SGPRS 4
-#endif
 
 struct si_compute {
 	struct si_context *ctx;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 85ade311c9c..9094427cef8 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -282,6 +282,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
+	case PIPE_CAP_TEXTURE_QUERY_LOD:
+	case PIPE_CAP_TEXTURE_GATHER_SM5:
 		return 1;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -304,6 +306,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 		return 4;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
@@ -312,12 +315,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 		return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
 
-	case PIPE_CAP_TEXTURE_QUERY_LOD:
-	case PIPE_CAP_TEXTURE_GATHER_SM5:
-		return HAVE_LLVM >= 0x0305;
-	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-		return HAVE_LLVM >= 0x0305 ? 4 : 0;
-
 	/* Unsupported features. */
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index ab5b3ee9ce9..9a216c7a8b0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2276,7 +2276,7 @@ static void tex_fetch_args(
 	unsigned sampler_src;
 	unsigned sampler_index;
 	unsigned num_deriv_channels = 0;
-	bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
+	bool has_offset = inst->Texture.NumOffsets > 0;
 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
 
 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
@@ -2682,8 +2682,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 	unsigned opcode = emit_data->inst->Instruction.Opcode;
 	unsigned target = emit_data->inst->Texture.Texture;
 	char intr_name[127];
-	bool has_offset = HAVE_LLVM >= 0x0305 ?
-				emit_data->inst->Texture.NumOffsets > 0 : false;
+	bool has_offset = emit_data->inst->Texture.NumOffsets > 0;
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		emit_data->output[emit_data->chan] = lp_build_intrinsic(
-- 
cgit v1.2.3


From 2176b3b09f49c1fca9ab2dbcd6b821d5637230d0 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 3 Jul 2014 20:17:36 +0200
Subject: radeonsi: only use new versions of LLVM image and sample intrinsics

Just a cleanup I had made a long time ago and forgot about.

v2: use tgsi_is_shadow_target

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 469 ++++++++++++-------------------
 1 file changed, 186 insertions(+), 283 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 9a216c7a8b0..b108664f808 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2245,15 +2245,62 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 				struct lp_build_tgsi_context * bld_base,
 				struct lp_build_emit_data * emit_data);
 
-static bool tgsi_is_shadow_sampler(unsigned target)
+static bool tgsi_is_array_sampler(unsigned target)
 {
-	return target == TGSI_TEXTURE_SHADOW1D ||
+	return target == TGSI_TEXTURE_1D_ARRAY ||
 	       target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
-	       target == TGSI_TEXTURE_SHADOW2D ||
+	       target == TGSI_TEXTURE_2D_ARRAY ||
 	       target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
-	       target == TGSI_TEXTURE_SHADOWCUBE ||
+	       target == TGSI_TEXTURE_CUBE_ARRAY ||
 	       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
-	       target == TGSI_TEXTURE_SHADOWRECT;
+	       target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
+static void set_tex_fetch_args(struct gallivm_state *gallivm,
+			       struct lp_build_emit_data *emit_data,
+			       unsigned opcode, unsigned target,
+			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
+			       LLVMValueRef *param, unsigned count,
+			       unsigned dmask)
+{
+	unsigned num_args;
+	unsigned is_rect = target == TGSI_TEXTURE_RECT;
+	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+
+	/* Pad to power of two vector */
+	while (count < util_next_power_of_two(count))
+		param[count++] = LLVMGetUndef(i32);
+
+	/* Texture coordinates. */
+	if (count > 1)
+		emit_data->args[0] = lp_build_gather_values(gallivm, param, count);
+	else
+		emit_data->args[0] = param[0];
+
+	/* Resource. */
+	emit_data->args[1] = res_ptr;
+	num_args = 2;
+
+	if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXQ)
+		emit_data->dst_type = LLVMVectorType(i32, 4);
+	else {
+		emit_data->dst_type = LLVMVectorType(
+			LLVMFloatTypeInContext(gallivm->context), 4);
+
+		emit_data->args[num_args++] = samp_ptr;
+	}
+
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, dmask);
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, is_rect); /* unorm */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* r128 */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm,
+					tgsi_is_array_sampler(target)); /* da */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* glc */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* slc */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
+	emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
+
+	emit_data->arg_count = num_args;
 }
 
 static const struct lp_build_tgsi_action tex_action;
@@ -2264,6 +2311,7 @@ static void tex_fetch_args(
 {
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
 	const struct tgsi_full_instruction * inst = emit_data->inst;
 	unsigned opcode = inst->Instruction.Opcode;
 	unsigned target = inst->Texture.Texture;
@@ -2278,6 +2326,8 @@ static void tex_fetch_args(
 	unsigned num_deriv_channels = 0;
 	bool has_offset = inst->Texture.NumOffsets > 0;
 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+	unsigned dmask = 0xf;
 
 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
 	sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
@@ -2308,6 +2358,43 @@ static void tex_fetch_args(
 		fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
 	}
 
+	if (opcode == TGSI_OPCODE_TXQ) {
+		if (target == TGSI_TEXTURE_BUFFER) {
+			LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+
+			/* Read the size from the buffer descriptor directly. */
+			LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+			LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+							lp_build_const_int32(gallivm, 6), "");
+
+			if (si_shader_ctx->screen->b.chip_class >= VI) {
+				/* On VI, the descriptor contains the size in bytes,
+				 * but TXQ must return the size in elements.
+				 * The stride is always non-zero for resources using TXQ.
+				 */
+				LLVMValueRef stride =
+					LLVMBuildExtractElement(builder, res,
+								lp_build_const_int32(gallivm, 5), "");
+				stride = LLVMBuildLShr(builder, stride,
+						       lp_build_const_int32(gallivm, 16), "");
+				stride = LLVMBuildAnd(builder, stride,
+						      lp_build_const_int32(gallivm, 0x3FFF), "");
+
+				size = LLVMBuildUDiv(builder, size, stride, "");
+			}
+
+			emit_data->args[0] = size;
+			return;
+		}
+
+		/* Textures - set the mip level. */
+		address[count++] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+
+		set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+				   NULL, address, count, 0xf);
+		return;
+	}
+
 	if (target == TGSI_TEXTURE_BUFFER) {
 		LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
 		LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
@@ -2375,7 +2462,7 @@ static void tex_fetch_args(
 		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 
 	/* Pack depth comparison value */
-	if (tgsi_is_shadow_sampler(target) && opcode != TGSI_OPCODE_LODQ) {
+	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
 		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 			address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 		} else {
@@ -2457,9 +2544,7 @@ static void tex_fetch_args(
 
 	for (chan = 0; chan < count; chan++ ) {
 		address[chan] = LLVMBuildBitCast(gallivm->builder,
-						 address[chan],
-						 LLVMInt32TypeInContext(gallivm->context),
-						 "");
+						 address[chan], i32, "");
 	}
 
 	/* Adjust the sample index according to FMASK.
@@ -2491,22 +2576,14 @@ static void tex_fetch_args(
 		}
 		txf_address[3] = bld_base->uint_bld.zero;
 
-		/* Pad to a power-of-two size. */
-		while (txf_count < util_next_power_of_two(txf_count))
-			txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
 		/* Read FMASK using TXF. */
 		inst.Instruction.Opcode = TGSI_OPCODE_TXF;
-		inst.Texture.Texture = target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY;
+		inst.Texture.Texture = target;
 		txf_emit_data.inst = &inst;
 		txf_emit_data.chan = 0;
-		txf_emit_data.dst_type = LLVMVectorType(
-			LLVMInt32TypeInContext(gallivm->context), 4);
-		txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
-		txf_emit_data.args[1] = fmask_ptr;
-		txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
-		txf_emit_data.arg_count = 3;
-
+		set_tex_fetch_args(gallivm, &txf_emit_data, TGSI_OPCODE_TXF,
+				   target, fmask_ptr, NULL,
+				   txf_address, txf_count, 0xf);
 		build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
 
 		/* Initialize some constants. */
@@ -2551,9 +2628,6 @@ static void tex_fetch_args(
 					final_sample, address[sample_chan], "");
 	}
 
-	/* Resource */
-	emit_data->args[1] = res_ptr;
-
 	if (opcode == TGSI_OPCODE_TXF) {
 		/* add tex offsets */
 		if (inst->Texture.NumOffsets) {
@@ -2589,89 +2663,37 @@ static void tex_fetch_args(
 				/* texture offsets do not apply to other texture targets */
 			}
 		}
+	}
 
-		emit_data->args[2] = lp_build_const_int32(gallivm, target);
-		emit_data->arg_count = 3;
-
-		emit_data->dst_type = LLVMVectorType(
-			LLVMInt32TypeInContext(gallivm->context),
-			4);
-	} else if (opcode == TGSI_OPCODE_TG4 ||
-		   opcode == TGSI_OPCODE_LODQ ||
-		   has_offset) {
-		unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
-				    target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
-				    target == TGSI_TEXTURE_2D_ARRAY ||
-				    target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
-				    target == TGSI_TEXTURE_CUBE_ARRAY ||
-				    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
-		unsigned is_rect = target == TGSI_TEXTURE_RECT;
-		unsigned dmask = 0xf;
-
-		if (opcode == TGSI_OPCODE_TG4) {
-			unsigned gather_comp = 0;
-
-			/* DMASK was repurposed for GATHER4. 4 components are always
-			 * returned and DMASK works like a swizzle - it selects
-			 * the component to fetch. The only valid DMASK values are
-			 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
-			 * (red,red,red,red) etc.) The ISA document doesn't mention
-			 * this.
-			 */
+	if (opcode == TGSI_OPCODE_TG4) {
+		unsigned gather_comp = 0;
 
-			/* Get the component index from src1.x for Gather4. */
-			if (!tgsi_is_shadow_sampler(target)) {
-				LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
-				LLVMValueRef comp_imm;
-				struct tgsi_src_register src1 = inst->Src[1].Register;
+		/* DMASK was repurposed for GATHER4. 4 components are always
+		 * returned and DMASK works like a swizzle - it selects
+		 * the component to fetch. The only valid DMASK values are
+		 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+		 * (red,red,red,red) etc.) The ISA document doesn't mention
+		 * this.
+		 */
 
-				assert(src1.File == TGSI_FILE_IMMEDIATE);
+		/* Get the component index from src1.x for Gather4. */
+		if (!tgsi_is_shadow_target(target)) {
+			LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+			LLVMValueRef comp_imm;
+			struct tgsi_src_register src1 = inst->Src[1].Register;
 
-				comp_imm = imms[src1.Index][src1.SwizzleX];
-				gather_comp = LLVMConstIntGetZExtValue(comp_imm);
-				gather_comp = CLAMP(gather_comp, 0, 3);
-			}
+			assert(src1.File == TGSI_FILE_IMMEDIATE);
 
-			dmask = 1 << gather_comp;
+			comp_imm = imms[src1.Index][src1.SwizzleX];
+			gather_comp = LLVMConstIntGetZExtValue(comp_imm);
+			gather_comp = CLAMP(gather_comp, 0, 3);
 		}
 
-		emit_data->args[2] = samp_ptr;
-		emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
-		emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
-		emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
-		emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
-		emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
-		emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
-		emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
-		emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
-
-		emit_data->arg_count = 11;
-
-		emit_data->dst_type = LLVMVectorType(
-			LLVMFloatTypeInContext(gallivm->context),
-			4);
-	} else {
-		emit_data->args[2] = samp_ptr;
-		emit_data->args[3] = lp_build_const_int32(gallivm, target);
-		emit_data->arg_count = 4;
-
-		emit_data->dst_type = LLVMVectorType(
-			LLVMFloatTypeInContext(gallivm->context),
-			4);
+		dmask = 1 << gather_comp;
 	}
 
-	/* The fetch opcode has been converted to a 2D array fetch.
-	 * This simplifies the LLVM backend. */
-	if (target == TGSI_TEXTURE_CUBE_ARRAY)
-		target = TGSI_TEXTURE_2D_ARRAY;
-	else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-		target = TGSI_TEXTURE_SHADOW2D_ARRAY;
-
-	/* Pad to power of two vector */
-	while (count < util_next_power_of_two(count))
-		address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
-	emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
+	set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+			   samp_ptr, address, count, dmask);
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -2683,6 +2705,16 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 	unsigned target = emit_data->inst->Texture.Texture;
 	char intr_name[127];
 	bool has_offset = emit_data->inst->Texture.NumOffsets > 0;
+	bool is_shadow = tgsi_is_shadow_target(target);
+	char type[64];
+	const char *name = "llvm.SI.image.sample";
+	const char *infix = "";
+
+	if (opcode == TGSI_OPCODE_TXQ && target == TGSI_TEXTURE_BUFFER) {
+		/* Just return the buffer size. */
+		emit_data->output[emit_data->chan] = emit_data->args[0];
+		return;
+	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		emit_data->output[emit_data->chan] = lp_build_intrinsic(
@@ -2693,191 +2725,68 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 		return;
 	}
 
-	if (opcode == TGSI_OPCODE_TG4 ||
-	    opcode == TGSI_OPCODE_LODQ ||
-	    (opcode != TGSI_OPCODE_TXF && has_offset)) {
-		bool is_shadow = tgsi_is_shadow_sampler(target);
-		const char *name = "llvm.SI.image.sample";
-		const char *infix = "";
-
-		switch (opcode) {
-		case TGSI_OPCODE_TEX:
-		case TGSI_OPCODE_TEX2:
-		case TGSI_OPCODE_TXP:
-			break;
-		case TGSI_OPCODE_TXB:
-		case TGSI_OPCODE_TXB2:
-			infix = ".b";
-			break;
-		case TGSI_OPCODE_TXL:
-		case TGSI_OPCODE_TXL2:
-			infix = ".l";
-			break;
-		case TGSI_OPCODE_TXD:
-			infix = ".d";
-			break;
-		case TGSI_OPCODE_TG4:
-			name = "llvm.SI.gather4";
-			break;
-		case TGSI_OPCODE_LODQ:
-			name = "llvm.SI.getlod";
-			is_shadow = false;
-			has_offset = false;
-			break;
-		default:
-			assert(0);
-			return;
-		}
-
-		/* Add the type and suffixes .c, .o if needed. */
-		sprintf(intr_name, "%s%s%s%s.v%ui32", name,
-			is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
-			LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
-
-		emit_data->output[emit_data->chan] = lp_build_intrinsic(
-			base->gallivm->builder, intr_name, emit_data->dst_type,
-			emit_data->args, emit_data->arg_count,
-			LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-	} else {
-		LLVMTypeRef i8, v16i8, v32i8;
-		const char *name;
-
-		switch (opcode) {
-		case TGSI_OPCODE_TEX:
-		case TGSI_OPCODE_TEX2:
-		case TGSI_OPCODE_TXP:
-			name = "llvm.SI.sample";
-			break;
-		case TGSI_OPCODE_TXB:
-		case TGSI_OPCODE_TXB2:
-			name = "llvm.SI.sampleb";
-			break;
-		case TGSI_OPCODE_TXD:
-			name = "llvm.SI.sampled";
-			break;
-		case TGSI_OPCODE_TXF:
-			name = "llvm.SI.imageload";
-			break;
-		case TGSI_OPCODE_TXL:
-		case TGSI_OPCODE_TXL2:
-			name = "llvm.SI.samplel";
-			break;
-		default:
-			assert(0);
-			return;
-		}
-
-		i8 = LLVMInt8TypeInContext(base->gallivm->context);
-		v16i8 = LLVMVectorType(i8, 16);
-		v32i8 = LLVMVectorType(i8, 32);
-
-		emit_data->args[1] = LLVMBuildBitCast(base->gallivm->builder,
-						emit_data->args[1], v32i8, "");
-		if (opcode != TGSI_OPCODE_TXF) {
-			emit_data->args[2] = LLVMBuildBitCast(base->gallivm->builder,
-						emit_data->args[2], v16i8, "");
-		}
-
-		sprintf(intr_name, "%s.v%ui32", name,
-			LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
-
-		emit_data->output[emit_data->chan] = lp_build_intrinsic(
-			base->gallivm->builder, intr_name, emit_data->dst_type,
-			emit_data->args, emit_data->arg_count,
-			LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-	}
-}
-
-static void txq_fetch_args(
-	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
-{
-	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
-	const struct tgsi_full_instruction *inst = emit_data->inst;
-	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	LLVMBuilderRef builder = gallivm->builder;
-	unsigned target = inst->Texture.Texture;
-	LLVMValueRef res_ptr;
-
-	if (inst->Src[1].Register.Indirect) {
-		const struct tgsi_full_src_register *reg = &inst->Src[1];
-		LLVMValueRef ind_index;
-
-		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
-
-		res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-		res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
-						   ind_index);
-	} else
-		res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
-
-	if (target == TGSI_TEXTURE_BUFFER) {
-		LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-		LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
-
-		/* Read the size from the buffer descriptor directly. */
-		LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
-		LLVMValueRef size = LLVMBuildExtractElement(builder, res,
-						lp_build_const_int32(gallivm, 6), "");
-
-		if (si_shader_ctx->screen->b.chip_class >= VI) {
-			/* On VI, the descriptor contains the size in bytes,
-			 * but TXQ must return the size in elements.
-			 * The stride is always non-zero for resources using TXQ.
-			 */
-			LLVMValueRef stride =
-				LLVMBuildExtractElement(builder, res,
-							lp_build_const_int32(gallivm, 5), "");
-			stride = LLVMBuildLShr(builder, stride,
-					       lp_build_const_int32(gallivm, 16), "");
-			stride = LLVMBuildAnd(builder, stride,
-					      lp_build_const_int32(gallivm, 0x3FFF), "");
-
-			size = LLVMBuildUDiv(builder, size, stride, "");
-		}
-
-		emit_data->args[0] = size;
+	switch (opcode) {
+	case TGSI_OPCODE_TXF:
+		name = target == TGSI_TEXTURE_2D_MSAA ||
+		       target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
+			       "llvm.SI.image.load" :
+			       "llvm.SI.image.load.mip";
+		is_shadow = false;
+		has_offset = false;
+		break;
+	case TGSI_OPCODE_TXQ:
+		name = "llvm.SI.getresinfo";
+		is_shadow = false;
+		has_offset = false;
+		break;
+	case TGSI_OPCODE_LODQ:
+		name = "llvm.SI.getlod";
+		is_shadow = false;
+		has_offset = false;
+		break;
+	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TEX2:
+	case TGSI_OPCODE_TXP:
+		break;
+	case TGSI_OPCODE_TXB:
+	case TGSI_OPCODE_TXB2:
+		infix = ".b";
+		break;
+	case TGSI_OPCODE_TXL:
+	case TGSI_OPCODE_TXL2:
+		infix = ".l";
+		break;
+	case TGSI_OPCODE_TXD:
+		infix = ".d";
+		break;
+	case TGSI_OPCODE_TG4:
+		name = "llvm.SI.gather4";
+		break;
+	default:
+		assert(0);
 		return;
 	}
 
-	/* Mip level */
-	emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-
-	/* Resource */
-	emit_data->args[1] = res_ptr;
-
-	/* Texture target */
-	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-		target = TGSI_TEXTURE_2D_ARRAY;
-
-	emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
-						  target);
-
-	emit_data->arg_count = 3;
-
-	emit_data->dst_type = LLVMVectorType(
-		LLVMInt32TypeInContext(bld_base->base.gallivm->context),
-		4);
-}
-
-static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
-				struct lp_build_tgsi_context * bld_base,
-				struct lp_build_emit_data * emit_data)
-{
-	unsigned target = emit_data->inst->Texture.Texture;
+	if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
+		sprintf(type, ".v%ui32",
+			LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+	else
+		strcpy(type, ".i32");
 
-	if (target == TGSI_TEXTURE_BUFFER) {
-		/* Just return the buffer size. */
-		emit_data->output[emit_data->chan] = emit_data->args[0];
-		return;
-	}
+	/* Add the type and suffixes .c, .o if needed. */
+	sprintf(intr_name, "%s%s%s%s%s",
+		name, is_shadow ? ".c" : "", infix,
+		has_offset ? ".o" : "", type);
 
-	build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
+	emit_data->output[emit_data->chan] = lp_build_intrinsic(
+		base->gallivm->builder, intr_name, emit_data->dst_type,
+		emit_data->args, emit_data->arg_count,
+		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 
 	/* Divide the number of layers by 6 to get the number of cubes. */
-	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
+	if (opcode == TGSI_OPCODE_TXQ &&
+	    (target == TGSI_TEXTURE_CUBE_ARRAY ||
+	     target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)) {
 		LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 		LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
 		LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
@@ -3354,12 +3263,6 @@ static const struct lp_build_tgsi_action tex_action = {
 	.emit = build_tex_intrinsic,
 };
 
-static const struct lp_build_tgsi_action txq_action = {
-	.fetch_args = txq_fetch_args,
-	.emit = build_txq_intrinsic,
-	.intr_name = "llvm.SI.resinfo"
-};
-
 static const struct lp_build_tgsi_action interp_action = {
 	.fetch_args = interp_fetch_args,
 	.emit = build_interp_intrinsic,
@@ -4069,7 +3972,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 	bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
-	bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+	bld_base->op_actions[TGSI_OPCODE_TXQ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
 
-- 
cgit v1.2.3


From 6a684ff67e650d2da5ccbd7ba72ade0d0abb7ea7 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 15:49:55 +0200
Subject: radeonsi/compute: add buffers to the CS directly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Packets are emitted immediately anyway.

Acked-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_compute.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index ed9147cc91e..e1849bad933 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -294,9 +294,10 @@ static void si_launch_grid(
 			    shader->scratch_bytes_per_wave *
 			    num_waves_for_scratch);
 
-		si_pm4_add_bo(pm4, shader->scratch_bo,
-				RADEON_USAGE_READWRITE,
-				RADEON_PRIO_SHADER_RESOURCE_RW);
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+					  shader->scratch_bo,
+					  RADEON_USAGE_READWRITE,
+					  RADEON_PRIO_SHADER_RESOURCE_RW);
 
 		scratch_buffer_va = shader->scratch_bo->gpu_address;
 	}
@@ -309,8 +310,8 @@ static void si_launch_grid(
 	kernel_args_va = input_buffer->gpu_address;
 	kernel_args_va += kernel_args_offset;
 
-	si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ,
-		RADEON_PRIO_SHADER_DATA);
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
+				  RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 
 	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
 	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
@@ -337,7 +338,9 @@ static void si_launch_grid(
 		if (!buffer) {
 			continue;
 		}
-		si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
+					  RADEON_USAGE_READWRITE,
+					  RADEON_PRIO_SHADER_RESOURCE_RW);
 	}
 
 	/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
@@ -358,7 +361,8 @@ static void si_launch_grid(
 #if HAVE_LLVM >= 0x0306
 	shader_va += pc;
 #endif
-	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
+				  RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 	si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
 	si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
 
-- 
cgit v1.2.3


From afa752d3f03ac6697581ff5d324e8ac0512ef513 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 15:59:33 +0200
Subject: radeonsi: decrease the size of si_pm4_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Acked-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_pm4.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 5282d00fe46..309a5969368 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -29,9 +29,8 @@
 
 #include "radeon/radeon_winsys.h"
 
-#define SI_PM4_MAX_DW		256
-#define SI_PM4_MAX_BO		32
-#define SI_PM4_MAX_RELOCS	4
+#define SI_PM4_MAX_DW		160
+#define SI_PM4_MAX_BO		1
 
 // forward defines
 struct si_context;
-- 
cgit v1.2.3


From 60ec8fb448f292b8aac08f74c26da8171b2b6a8f Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 3 Sep 2015 19:34:58 +0200
Subject: radeonsi: don't update polygon offset state if it has no effect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 4 +++-
 src/gallium/drivers/radeonsi/si_state.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f698c59d87a..d74f6e896c4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -636,7 +636,7 @@ static void si_update_poly_offset_state(struct si_context *sctx)
 {
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
-	if (!rs || !sctx->framebuffer.state.zsbuf)
+	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
 		return;
 
 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
@@ -691,6 +691,8 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 	rs->poly_stipple_enable = state->poly_stipple_enable;
 	rs->line_smooth = state->line_smooth;
 	rs->poly_smooth = state->poly_smooth;
+	rs->uses_poly_offset = state->offset_point || state->offset_line ||
+			       state->offset_tri;
 
 	rs->flatshade = state->flatshade;
 	rs->sprite_coord_enable = state->sprite_coord_enable;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3718e05f3b3..900b70f804c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -58,6 +58,7 @@ struct si_state_rasterizer {
 	bool			poly_stipple_enable;
 	bool			line_smooth;
 	bool			poly_smooth;
+	bool			uses_poly_offset;
 };
 
 struct si_dsa_stencil_ref_part {
-- 
cgit v1.2.3


From b409524fef1b02c717b4cc1fef70d5710781f824 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 6 Sep 2015 15:41:35 +0200
Subject: gallium/radeon: handle PIPE_TRANSFER_FLUSH_EXPLICIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Basically, do the same thing as for buffer_unmap, but use the explicit range
instead. It's for apps which want to map a whole buffer and mark touched
ranges explicitly.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 62 +++++++++++++++++--------
 src/gallium/drivers/radeon/r600_pipe_common.c   |  2 +-
 src/gallium/drivers/radeon/r600_texture.c       |  2 +-
 3 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index cb9809f2449..4adcccbb8ed 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -346,37 +346,59 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 					ptransfer, data, NULL, 0);
 }
 
-static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
-				       struct pipe_transfer *transfer)
+static void r600_buffer_do_flush_region(struct pipe_context *ctx,
+					struct pipe_transfer *transfer,
+				        const struct pipe_box *box)
 {
 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
 	struct r600_resource *rbuffer = r600_resource(transfer->resource);
 
 	if (rtransfer->staging) {
-		if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
-			struct pipe_resource *dst, *src;
-			unsigned soffset, doffset, size;
-			struct pipe_box box;
+		struct pipe_resource *dst, *src;
+		unsigned soffset;
+		struct pipe_box dma_box;
 
-			dst = transfer->resource;
-			src = &rtransfer->staging->b.b;
-			size = transfer->box.width;
-			doffset = transfer->box.x;
-			soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
+		dst = transfer->resource;
+		src = &rtransfer->staging->b.b;
+		soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
 
-			u_box_1d(soffset, size, &box);
+		u_box_1d(soffset, box->width, &dma_box);
 
-			/* Copy the staging buffer into the original one. */
-			rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
-		}
-		pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+		/* Copy the staging buffer into the original one. */
+		rctx->dma_copy(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
 	}
 
-	if (transfer->usage & PIPE_TRANSFER_WRITE) {
-		util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
-			       transfer->box.x + transfer->box.width);
+	util_range_add(&rbuffer->valid_buffer_range, box->x,
+		       box->x + box->width);
+}
+
+static void r600_buffer_flush_region(struct pipe_context *ctx,
+				     struct pipe_transfer *transfer,
+				     const struct pipe_box *rel_box)
+{
+	if (transfer->usage & (PIPE_TRANSFER_WRITE |
+			       PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+		struct pipe_box box;
+
+		u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+		r600_buffer_do_flush_region(ctx, transfer, &box);
 	}
+}
+
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+				       struct pipe_transfer *transfer)
+{
+	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+	if (transfer->usage & PIPE_TRANSFER_WRITE &&
+	    !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+		r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
+
+	if (rtransfer->staging)
+		pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+
 	util_slab_free(&rctx->pool_transfers, transfer);
 }
 
@@ -385,7 +407,7 @@ static const struct u_resource_vtbl r600_buffer_vtbl =
 	NULL,				/* get_handle */
 	r600_buffer_destroy,		/* resource_destroy */
 	r600_buffer_transfer_map,	/* transfer_map */
-	NULL,				/* transfer_flush_region */
+	r600_buffer_flush_region,	/* transfer_flush_region */
 	r600_buffer_transfer_unmap,	/* transfer_unmap */
 	NULL				/* transfer_inline_write */
 };
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 1302b5a346c..836da76c0bc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -231,7 +231,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
 		rctx->max_db = 4;
 
 	rctx->b.transfer_map = u_transfer_map_vtbl;
-	rctx->b.transfer_flush_region = u_default_transfer_flush_region;
+	rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
 	rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
 	rctx->b.transfer_inline_write = u_default_transfer_inline_write;
         rctx->b.memory_barrier = r600_memory_barrier;
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 89f18fb106f..a0259160f8c 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1092,7 +1092,7 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 	NULL,				/* get_handle */
 	r600_texture_destroy,		/* resource_destroy */
 	r600_texture_transfer_map,	/* transfer_map */
-	NULL,				/* transfer_flush_region */
+	u_default_transfer_flush_region, /* transfer_flush_region */
 	r600_texture_transfer_unmap,	/* transfer_unmap */
 	NULL				/* transfer_inline_write */
 };
-- 
cgit v1.2.3


From 85500fe2e154c1664dc3a89f1e58e9a9c0dff012 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:51 +0200
Subject: tgsi: Remove trailing backslash in comment

It clearly is here by accident.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 75cd0d53c5a..9544623e90c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
  * execute a texture instruction.
  *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
  * instruction variants like proj, lod, and texture with lod bias.
  * sampler indicates which src register the sampler is contained in.
  */
-- 
cgit v1.2.3


From bdc69552ca0268fa489daa215dff7db96fdbeb1d Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:52 +0200
Subject: softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value

The level-of-detail bias wasn't simply added in the explicit LOD case.
This case seems to be tested only in piglit's
fs-texturequerylod-nearest-biased test, which is currently skipped, as
softpipe does not support textureQueryLod at the moment.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 565fca632c6..19188b03bb5 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1892,7 +1892,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
       break;
    case tgsi_sampler_lod_explicit:
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
-         lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
+         lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
       }
       break;
    case tgsi_sampler_lod_zero:
-- 
cgit v1.2.3


From 16084cd2cf055849933e19047e604d384da81f8e Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:53 +0200
Subject: softpipe: Split compute_lambda_lod into two functions

textureQueryLod returns a vec2 with a mipmap information and a
LOD. The latter needs to be not clamped.

v2:
  - changed the "not_clamped" part to "unclamped"
  - corrected "clamp into" to "clamp to"
  - splitted too long lines

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 57 +++++++++++++++++++---------
 1 file changed, 40 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 19188b03bb5..30c9cb042ea 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler,
 }
 
 
-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped to lod_min and lod_max.
  * \param lod_in per-fragment lod_bias or explicit_lod.
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
-                   const float s[TGSI_QUAD_SIZE],
-                   const float t[TGSI_QUAD_SIZE],
-                   const float p[TGSI_QUAD_SIZE],
-                   const float lod_in[TGSI_QUAD_SIZE],
-                   enum tgsi_sampler_control control,
-                   float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_unclamped(struct sp_sampler_view *sp_sview,
+                             struct sp_sampler *sp_samp,
+                             const float s[TGSI_QUAD_SIZE],
+                             const float t[TGSI_QUAD_SIZE],
+                             const float p[TGSI_QUAD_SIZE],
+                             const float lod_in[TGSI_QUAD_SIZE],
+                             enum tgsi_sampler_control control,
+                             float lod[TGSI_QUAD_SIZE])
 {
    const struct pipe_sampler_state *sampler = &sp_samp->base;
-   float lod_bias = sampler->lod_bias;
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float lod_bias = sampler->lod_bias;
    float lambda;
    uint i;
 
@@ -1881,24 +1880,22 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
       /* XXX FIXME */
    case tgsi_sampler_derivs_explicit:
       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
-      lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+      lod[0] = lod[1] = lod[2] = lod[3] = lambda;
       break;
    case tgsi_sampler_lod_bias:
       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
          lod[i] = lambda + lod_in[i];
-         lod[i] = CLAMP(lod[i], min_lod, max_lod);
       }
       break;
    case tgsi_sampler_lod_explicit:
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
-         lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
+         lod[i] = lod_in[i] + lod_bias;
       }
       break;
    case tgsi_sampler_lod_zero:
    case tgsi_sampler_gather:
-      /* this is all static state in the sampler really need clamp here? */
-      lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+      lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
       break;
    default:
       assert(0);
@@ -1906,6 +1903,32 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
    }
 }
 
+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(struct sp_sampler_view *sp_sview,
+                   struct sp_sampler *sp_samp,
+                   const float s[TGSI_QUAD_SIZE],
+                   const float t[TGSI_QUAD_SIZE],
+                   const float p[TGSI_QUAD_SIZE],
+                   const float lod_in[TGSI_QUAD_SIZE],
+                   enum tgsi_sampler_control control,
+                   float lod[TGSI_QUAD_SIZE])
+{
+   const struct pipe_sampler_state *sampler = &sp_samp->base;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
+   int i;
+
+   compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                s, t, p, lod_in, control, lod);
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+      lod[i] = CLAMP(lod[i], min_lod, max_lod);
+   }
+}
+
 static inline unsigned
 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
 {
-- 
cgit v1.2.3


From b9bc6c42c96773a5784897c55da5387045c0e9b3 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:54 +0200
Subject: softpipe: Put mip_filter_func inside a struct

Putting this function pointer into a struct enables grouping of
several related functions in a single place. For now it is just a
single function, but the struct will be later extended with a
mip_level_func for returning relative mip level.

v2:
  - renamed sp_mip struct to sp_filter_funcs
  - renamed sp_filter_funcs instances from mip_foo to funcs_foo
  - splitted too long lines
  - sp_sampler now holds a pointer to sp_filter_funcs instead of an
    instance of it
  - some const fixes

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 45 +++++++++++++++++++++-------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  5 +++-
 2 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 30c9cb042ea..8f7cb1a3972 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2515,6 +2515,29 @@ mip_filter_linear_2d_linear_repeat_POT(
    }
 }
 
+static const struct sp_filter_funcs funcs_linear = {
+   mip_filter_linear
+};
+
+static const struct sp_filter_funcs funcs_nearest = {
+   mip_filter_nearest
+};
+
+static const struct sp_filter_funcs funcs_none = {
+   mip_filter_none
+};
+
+static const struct sp_filter_funcs funcs_none_no_filter_select = {
+   mip_filter_none_no_filter_select
+};
+
+static const struct sp_filter_funcs funcs_linear_aniso = {
+   mip_filter_linear_aniso
+};
+
+static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
+   mip_filter_linear_2d_linear_repeat_POT
+};
 
 /**
  * Do shadow/depth comparisons.
@@ -2918,18 +2941,18 @@ sample_mip(struct sp_sampler_view *sp_sview,
            const struct filter_args *filt_args,
            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *funcs = NULL;
    img_filter_func min_img_filter = NULL;
    img_filter_func mag_img_filter = NULL;
 
    if (filt_args->control == tgsi_sampler_gather) {
-      mip_filter = mip_filter_nearest;
+      funcs = &funcs_nearest;
       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
    } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-      mip_filter = mip_filter_linear_2d_linear_repeat_POT;
+      funcs = &funcs_linear_2d_linear_repeat_POT;
    }
    else {
-      mip_filter = sp_samp->mip_filter;
+      funcs = sp_samp->filter_funcs;
       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
       if (sp_samp->min_mag_equal) {
          mag_img_filter = min_img_filter;
@@ -2939,8 +2962,8 @@ sample_mip(struct sp_sampler_view *sp_sview,
       }
    }
 
-   mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-              s, t, p, c0, lod, filt_args, rgba);
+   funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+                 s, t, p, c0, lod, filt_args, rgba);
 
    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
       sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
@@ -3239,13 +3262,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
    switch (sampler->min_mip_filter) {
    case PIPE_TEX_MIPFILTER_NONE:
       if (sampler->min_img_filter == sampler->mag_img_filter)
-         samp->mip_filter = mip_filter_none_no_filter_select;
+         samp->filter_funcs = &funcs_none_no_filter_select;
       else
-         samp->mip_filter = mip_filter_none;
+         samp->filter_funcs = &funcs_none;
       break;
 
    case PIPE_TEX_MIPFILTER_NEAREST:
-      samp->mip_filter = mip_filter_nearest;
+      samp->filter_funcs = &funcs_nearest;
       break;
 
    case PIPE_TEX_MIPFILTER_LINEAR:
@@ -3257,11 +3280,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
           sampler->max_anisotropy <= 1) {
          samp->min_mag_equal_repeat_linear = TRUE;
       }
-      samp->mip_filter = mip_filter_linear;
+      samp->filter_funcs = &funcs_linear;
 
       /* Anisotropic filtering extension. */
       if (sampler->max_anisotropy > 1) {
-         samp->mip_filter = mip_filter_linear_aniso;
+         samp->filter_funcs = &funcs_linear_aniso;
 
          /* Override min_img_filter:
           * min_img_filter needs to be set to NEAREST since we need to access
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 7d1aafc4473..cee545d545a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -128,6 +128,9 @@ struct sp_sampler_view
 
 };
 
+struct sp_filter_funcs {
+   mip_filter_func filter;
+};
 
 struct sp_sampler {
    struct pipe_sampler_state base;
@@ -144,7 +147,7 @@ struct sp_sampler {
    wrap_linear_func linear_texcoord_t;
    wrap_linear_func linear_texcoord_p;
 
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *filter_funcs;
 };
 
 
-- 
cgit v1.2.3


From 380a3c08049e5a3b0b1a891e3288b001c535d62f Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:55 +0200
Subject: softpipe: Split code getting a filter into separate function

This function will be later used by textureQueryLod. The
img_filter_func are optional, because textureQueryLod will not need
them.

v2:
  - adapted to changes in previous commit (renames)
  - simplified conditions a bit
  - updated docs
  - splitted too long lines

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 58 ++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 8f7cb1a3972..4bfb300e08d 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2929,6 +2929,43 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
    }
 }
 
+/**
+ * Get mip filter funcs, and optionally both img min filter and img mag
+ * filter. Note that both img filter function pointers must be either non-NULL
+ * or NULL.
+ */
+static void
+get_filters(struct sp_sampler_view *sp_sview,
+            struct sp_sampler *sp_samp,
+            enum tgsi_sampler_control control,
+            const struct sp_filter_funcs **funcs,
+            img_filter_func *min,
+            img_filter_func *mag)
+{
+   assert(funcs);
+   if (control == tgsi_sampler_gather) {
+      *funcs = &funcs_nearest;
+      if (min) {
+         *min = get_img_filter(sp_sview, &sp_samp->base,
+                               PIPE_TEX_FILTER_LINEAR, true);
+      }
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+      *funcs = &funcs_linear_2d_linear_repeat_POT;
+   } else {
+      *funcs = sp_samp->filter_funcs;
+      if (min) {
+         assert(mag);
+         *min = get_img_filter(sp_sview, &sp_samp->base,
+                               sp_samp->min_img_filter, false);
+         if (sp_samp->min_mag_equal) {
+            *mag = *min;
+         } else {
+            *mag = get_img_filter(sp_sview, &sp_samp->base,
+                                  sp_samp->base.mag_img_filter, false);
+         }
+      }
+   }
+}
 
 static void
 sample_mip(struct sp_sampler_view *sp_sview,
@@ -2945,28 +2982,15 @@ sample_mip(struct sp_sampler_view *sp_sview,
    img_filter_func min_img_filter = NULL;
    img_filter_func mag_img_filter = NULL;
 
-   if (filt_args->control == tgsi_sampler_gather) {
-      funcs = &funcs_nearest;
-      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
-   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-      funcs = &funcs_linear_2d_linear_repeat_POT;
-   }
-   else {
-      funcs = sp_samp->filter_funcs;
-      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
-      if (sp_samp->min_mag_equal) {
-         mag_img_filter = min_img_filter;
-      }
-      else {
-         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
-      }
-   }
+   get_filters(sp_sview, sp_samp, filt_args->control,
+               &funcs, &min_img_filter, &mag_img_filter);
 
    funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
                  s, t, p, c0, lod, filt_args, rgba);
 
    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
-      sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
+      sample_compare(sp_sview, sp_samp, s, t, p, c0,
+                     lod, filt_args->control, rgba);
    }
 
    if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
-- 
cgit v1.2.3


From ac3637dda04fe1315f19099bd142e4f8f6754b1d Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:56 +0200
Subject: softpipe: Split 3D to 2D coords conversion into separate function

This is to avoid tying the conversion to the sampling -
textureQueryLod will need to do the conversion too, but it does not do
any sampling.

So instead of a "get_samples" vfunc, there is just a bool saying
whether the conversion is needed or not. This solution keeps a nice
property of not adding any overhead for the common case (2D textures).

v2:
  - replaced the "convert_coords" vfunc with a "need_cube_convert"
    boolean to avoid overhead of copying arrays in common case
  - removed an unused typedef
  - splitted too long lines in convert_cube
  - const fixes in convert_cube

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 81 +++++++++++++++-------------
 src/gallium/drivers/softpipe/sp_tex_sample.h | 15 +-----
 2 files changed, 45 insertions(+), 51 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 4bfb300e08d..d7b656e4c97 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3003,27 +3003,22 @@ sample_mip(struct sp_sampler_view *sp_sview,
 
 
 /**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
+ * This function uses cube texture coordinates to choose a face of a cube and
+ * computes the 2D cube face coordinates. Puts face info into the sampler
+ * faces[] array.
  */
 static void
-sample_cube(struct sp_sampler_view *sp_sview,
-            struct sp_sampler *sp_samp,
-            const float s[TGSI_QUAD_SIZE],
-            const float t[TGSI_QUAD_SIZE],
-            const float p[TGSI_QUAD_SIZE],
-            const float c0[TGSI_QUAD_SIZE],
-            const float c1[TGSI_QUAD_SIZE],
-            const struct filter_args *filt_args,
-            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(struct sp_sampler_view *sp_sview,
+             struct sp_sampler *sp_samp,
+             const float s[TGSI_QUAD_SIZE],
+             const float t[TGSI_QUAD_SIZE],
+             const float p[TGSI_QUAD_SIZE],
+             const float c0[TGSI_QUAD_SIZE],
+             float ssss[TGSI_QUAD_SIZE],
+             float tttt[TGSI_QUAD_SIZE],
+             float pppp[TGSI_QUAD_SIZE])
 {
    unsigned j;
-   float ssss[4], tttt[4];
-
-   /* Not actually used, but the intermediate steps that do the
-    * dereferencing don't know it.
-    */
-   static float pppp[4] = { 0, 0, 0, 0 };
 
    pppp[0] = c0[0];
    pppp[1] = c0[1];
@@ -3061,8 +3056,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
 
       if (arx >= ary && arx >= arz) {
-         float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
+         const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (rx >= 0.0F) ?
+            PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             const float ima = -0.5F / fabsf(s[j]);
             ssss[j] = sign *  p[j] * ima + 0.5F;
@@ -3071,8 +3067,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
          }
       }
       else if (ary >= arx && ary >= arz) {
-         float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
+         const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (ry >= 0.0F) ?
+            PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             const float ima = -0.5F / fabsf(t[j]);
             ssss[j] =        -s[j] * ima + 0.5F;
@@ -3081,8 +3078,9 @@ sample_cube(struct sp_sampler_view *sp_sview,
          }
       }
       else {
-         float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
+         const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (rz >= 0.0F) ?
+            PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
             const float ima = -0.5F / fabsf(p[j]);
             ssss[j] = sign * -s[j] * ima + 0.5F;
@@ -3091,8 +3089,6 @@ sample_cube(struct sp_sampler_view *sp_sview,
          }
       }
    }
-
-   sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
 }
 
 
@@ -3411,12 +3407,8 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
          sview->need_swizzle = TRUE;
       }
 
-      if (view->target == PIPE_TEXTURE_CUBE ||
-          view->target == PIPE_TEXTURE_CUBE_ARRAY)
-         sview->get_samples = sample_cube;
-      else {
-         sview->get_samples = sample_mip;
-      }
+      sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
+                                  view->target == PIPE_TEXTURE_CUBE_ARRAY);
       sview->pot2d = spr->pot &&
                      (view->target == PIPE_TEXTURE_2D ||
                       view->target == PIPE_TEXTURE_RECT);
@@ -3460,13 +3452,19 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
                     enum tgsi_sampler_control control,
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
    struct filter_args filt_args;
+
    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    assert(sampler_index < PIPE_MAX_SAMPLERS);
-   assert(sp_samp->sp_sampler[sampler_index]);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
    /* always have a view here but texture is NULL if no sampler view was set. */
-   if (!sp_samp->sp_sview[sview_index].base.texture) {
+   if (!sp_sview->base.texture) {
       int i, j;
       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
@@ -3478,9 +3476,18 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 
    filt_args.control = control;
    filt_args.offset = offset;
-   sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
-                                              sp_samp->sp_sampler[sampler_index],
-                                              s, t, p, c0, lod, &filt_args, rgba);
+
+   if (sp_sview->need_cube_convert) {
+      float cs[TGSI_QUAD_SIZE];
+      float ct[TGSI_QUAD_SIZE];
+      float cp[TGSI_QUAD_SIZE];
+
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+
+      sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
+   } else {
+      sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
+   }
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index cee545d545a..2e3976d8034 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -86,18 +86,6 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
                                 const struct filter_args *args,
                                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
 
-
-typedef void (*filter_func)(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
-                            const float s[TGSI_QUAD_SIZE],
-                            const float t[TGSI_QUAD_SIZE],
-                            const float p[TGSI_QUAD_SIZE],
-                            const float c0[TGSI_QUAD_SIZE],
-                            const float lod[TGSI_QUAD_SIZE],
-                            const struct filter_args *args,
-                            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-
-
 typedef void (*fetch_func)(struct sp_sampler_view *sp_sview,
                            const int i[TGSI_QUAD_SIZE],
                            const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
@@ -116,8 +104,7 @@ struct sp_sampler_view
 
    boolean need_swizzle;
    boolean pot2d;
-
-   filter_func get_samples;
+   boolean need_cube_convert;
 
    /* this is just abusing the sampler_view object as local storage */
    unsigned faces[TGSI_QUAD_SIZE];
-- 
cgit v1.2.3


From d71a3be86008c275b5902de7759385643546a210 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:57 +0200
Subject: softpipe: Add functions for computing relative mipmap level

These functions will be used by textureQueryLod.

v2:

  - renamed mip_level_* funcs to mip_rel_level_* to indicate that
    these functions return mip level relative to base level and
    documented them
  - renamed a level member in sp_filter_funcs struct to relative_level
  - changed mip_rel_level_none and mip_rel_level_nearest to return mip
    level relative to base level, mip_rel_level_linear already did
    that
  - documented clamp_lod function

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 113 +++++++++++++++++++++++++++
 src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
 2 files changed, 120 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index d7b656e4c97..4a4cddfd308 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1936,6 +1936,43 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
    return (*(unsigned int *)lod_in) & 0x3;
 }
 
+/**
+ * Clamps given lod to both lod limits and mip level limits. Clamping to the
+ * latter limits is done so that lod is relative to the first (base) level.
+ */
+static void
+clamp_lod(const struct sp_sampler_view *sp_sview,
+          const struct sp_sampler *sp_samp,
+          const float lod[TGSI_QUAD_SIZE],
+          float clamped[TGSI_QUAD_SIZE])
+{
+   const float min_lod = sp_samp->base.min_lod;
+   const float max_lod = sp_samp->base.max_lod;
+   const float min_level = sp_sview->base.u.tex.first_level;
+   const float max_level = sp_sview->base.u.tex.last_level;
+   int i;
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+      float cl = lod[i];
+
+      cl = CLAMP(cl, min_lod, max_lod);
+      cl = CLAMP(cl, 0, max_level - min_level);
+      clamped[i] = cl;
+   }
+}
+
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear(struct sp_sampler_view *sp_sview,
+                     struct sp_sampler *sp_samp,
+                     const float lod[TGSI_QUAD_SIZE],
+                     float level[TGSI_QUAD_SIZE])
+{
+   clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
 static void
 mip_filter_linear(struct sp_sampler_view *sp_sview,
                   struct sp_sampler *sp_samp,
@@ -1998,6 +2035,25 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for nearest mip filter
+ */
+static void
+mip_rel_level_nearest(struct sp_sampler_view *sp_sview,
+                      struct sp_sampler *sp_samp,
+                      const float lod[TGSI_QUAD_SIZE],
+                      float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   clamp_lod(sp_sview, sp_samp, lod, level);
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+      /* TODO: It should rather be:
+       * level[j] = ceil(level[j] + 0.5F) - 1.0F;
+       */
+      level[j] = (int)(level[j] + 0.5F);
+}
+
 /**
  * Compute nearest mipmap level from texcoords.
  * Then sample the texture level for four elements of a quad.
@@ -2049,6 +2105,22 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none(struct sp_sampler_view *sp_sview,
+                   struct sp_sampler *sp_samp,
+                   const float lod[TGSI_QUAD_SIZE],
+                   float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+      level[j] = 0;
+   }
+}
+
 static void
 mip_filter_none(struct sp_sampler_view *sp_sview,
                 struct sp_sampler *sp_samp,
@@ -2087,6 +2159,18 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
+                                    struct sp_sampler *sp_samp,
+                                    const float lod[TGSI_QUAD_SIZE],
+                                    float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_none(sp_sview, sp_samp, lod, level);
+}
+
 static void
 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
                                  struct sp_sampler *sp_samp,
@@ -2339,6 +2423,18 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
 }
 
 
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_aniso(struct sp_sampler_view *sp_sview,
+                           struct sp_sampler *sp_samp,
+                           const float lod[TGSI_QUAD_SIZE],
+                           float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
+
 /**
  * Sample 2D texture using an anisotropic filter.
  */
@@ -2450,6 +2546,17 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
    }
 }
 
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
+                                          struct sp_sampler *sp_samp,
+                                          const float lod[TGSI_QUAD_SIZE],
+                                          float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
 
 /**
  * Specialized version of mip_filter_linear with hard-wired calls to
@@ -2516,26 +2623,32 @@ mip_filter_linear_2d_linear_repeat_POT(
 }
 
 static const struct sp_filter_funcs funcs_linear = {
+   mip_rel_level_linear,
    mip_filter_linear
 };
 
 static const struct sp_filter_funcs funcs_nearest = {
+   mip_rel_level_nearest,
    mip_filter_nearest
 };
 
 static const struct sp_filter_funcs funcs_none = {
+   mip_rel_level_none,
    mip_filter_none
 };
 
 static const struct sp_filter_funcs funcs_none_no_filter_select = {
+   mip_rel_level_none_no_filter_select,
    mip_filter_none_no_filter_select
 };
 
 static const struct sp_filter_funcs funcs_linear_aniso = {
+   mip_rel_level_linear_aniso,
    mip_filter_linear_aniso
 };
 
 static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
+   mip_rel_level_linear_2d_linear_repeat_POT,
    mip_filter_linear_2d_linear_repeat_POT
 };
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 2e3976d8034..72b4a1ac3e8 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -86,6 +86,12 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
                                 const struct filter_args *args,
                                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
 
+
+typedef void (*mip_level_func)(struct sp_sampler_view *sp_sview,
+                               struct sp_sampler *sp_samp,
+                               const float lod[TGSI_QUAD_SIZE],
+                               float level[TGSI_QUAD_SIZE]);
+
 typedef void (*fetch_func)(struct sp_sampler_view *sp_sview,
                            const int i[TGSI_QUAD_SIZE],
                            const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
@@ -116,6 +122,7 @@ struct sp_sampler_view
 };
 
 struct sp_filter_funcs {
+   mip_level_func relative_level;
    mip_filter_func filter;
 };
 
-- 
cgit v1.2.3


From 263d4a74062b16529a4819e870fa12b926e339ec Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:58 +0200
Subject: tgsi: Add code for handling lodq opcode

This introduces new vfunc in tgsi_sampler just for this opcode. I
decided against extending get_samples vfunc to return the mipmap level
and LOD - the function's prototype is already too scary and doing the
sampling for textureQueryLod would be a waste of time.

v2:
  - splitted too long lines

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 46 ++++++++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++++++
 2 files changed, 56 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9544623e90c..a3a79a06620 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
    }
 }
 
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+      args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+      args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+                            args[0]->f,
+                            args[1]->f,
+                            args[2]->f,
+                            args[3]->f,
+                            tgsi_sampler_lod_none,
+                            r[0].f,
+                            r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+}
 
 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -4378,6 +4418,12 @@ exec_instruction(
       exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
       break;
 
+   case TGSI_OPCODE_LODQ:
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_lodq(mach, inst);
+      break;
+
    case TGSI_OPCODE_UP2H:
       assert (0);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5d56aab2216..a07d727cdee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,16 @@ struct tgsi_sampler
                      const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                      const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
+                     const unsigned sview_index,
+                     const unsigned sampler_index,
+                     const float s[TGSI_QUAD_SIZE],
+                     const float t[TGSI_QUAD_SIZE],
+                     const float p[TGSI_QUAD_SIZE],
+                     const float c0[TGSI_QUAD_SIZE],
+                     const enum tgsi_sampler_control control,
+                     float mipmap[TGSI_QUAD_SIZE],
+                     float lod[TGSI_QUAD_SIZE]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS       4096
-- 
cgit v1.2.3


From 60905f2b19e13b03b5ae981e36ac434c6a4529ae Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Thu, 10 Sep 2015 14:15:59 +0200
Subject: softpipe: Implement and enable textureQueryLod

Passes the shader piglit tests and introduces no regressions.

This commit finally makes use of the refactoring in previous
commits.

v2:
  - adapted the code to changes in previous commits (renames,
    need_cube_convert stuff)
  - splitted too long lines

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_screen.c     |  2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 55 +++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 0bfd9c3578c..7ca8a67e109 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
       return 4;
    case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
       return 1;
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
       return 0;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 4a4cddfd308..9f2ba01b66a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3603,6 +3603,59 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
    }
 }
 
+static void
+sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+                  const unsigned sview_index,
+                  const unsigned sampler_index,
+                  const float s[TGSI_QUAD_SIZE],
+                  const float t[TGSI_QUAD_SIZE],
+                  const float p[TGSI_QUAD_SIZE],
+                  const float c0[TGSI_QUAD_SIZE],
+                  const enum tgsi_sampler_control control,
+                  float mipmap[TGSI_QUAD_SIZE],
+                  float lod[TGSI_QUAD_SIZE])
+{
+   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+   struct sp_tgsi_sampler *sp_tgsi_samp =
+      (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
+   const struct sp_filter_funcs *funcs;
+   int i;
+
+   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+   /* always have a view here but texture is NULL if no sampler view was
+    * set. */
+   if (!sp_sview->base.texture) {
+      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+         mipmap[i] = 0.0f;
+         lod[i] = 0.0f;
+      }
+      return;
+   }
+
+   if (sp_sview->need_cube_convert) {
+      float cs[TGSI_QUAD_SIZE];
+      float ct[TGSI_QUAD_SIZE];
+      float cp[TGSI_QUAD_SIZE];
+
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+      compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                   cs, ct, cp, lod_in, control, lod);
+   } else {
+      compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                   s, t, p, lod_in, control, lod);
+   }
+
+   get_filters(sp_sview, sp_samp, control, &funcs, NULL, NULL);
+   funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
+}
 
 static void
 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
@@ -3639,7 +3692,7 @@ sp_create_tgsi_sampler(void)
    samp->base.get_dims = sp_tgsi_get_dims;
    samp->base.get_samples = sp_tgsi_get_samples;
    samp->base.get_texel = sp_tgsi_get_texel;
+   samp->base.query_lod = sp_tgsi_query_lod;
 
    return samp;
 }
-
-- 
cgit v1.2.3


From bc75fe214d428f857155ae6737194c73ddfed55f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 8 Sep 2015 09:39:40 -0600
Subject: winsys/svga: remove useless assertion

An unsigned int is always >= 0.  Found with Coverity.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/winsys/svga/drm/vmw_context.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 394795eae14..1675af4cbc8 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -591,7 +591,6 @@ vmw_swc_commit(struct svga_winsys_context *swc)
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
 
-   assert(vswc->command.reserved >= 0);
    assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
    vswc->command.used += vswc->command.reserved;
    vswc->command.reserved = 0;
-- 
cgit v1.2.3


From ece33f96879775401361d14cb2faeb1a2f1ea806 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 8 Sep 2015 09:40:29 -0600
Subject: svga: remove useless MAX2() call

The sum of two unsigned ints is always >= 0.  Found with Coverity.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_state_tss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index a13980d0e13..5991da13868 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -90,7 +90,7 @@ emit_tex_binding_unit(struct svga_context *svga,
       }
       else {
          last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
-         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
+         min_lod = s->view_min_lod + sv->u.tex.first_level;
          min_lod = MIN2(min_lod, last_level);
          max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
       }
-- 
cgit v1.2.3


From 289804515f7de20cb41a5e1b4c20864f0dab8d51 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 10 Sep 2015 08:47:16 -0600
Subject: svga: fix shader variant memory leak

Fixes a small leak in a seldom-hit corner case for VS/FS compilation.
Found with coverity.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_state_fs.c | 3 +++
 src/gallium/drivers/svga/svga_state_vs.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 849319d3cf3..5d39100f63a 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -148,6 +148,9 @@ compile_fs(struct svga_context *svga,
                    " using dummy shader instead.\n",
                    (unsigned) (variant->nr_tokens
                                * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      /* Use simple pass-through shader instead */
       variant = get_compiled_dummy_shader(svga, fs, key);
       if (!variant) {
          ret = PIPE_ERROR;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 45dcb5d0f76..de41519593c 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -139,6 +139,9 @@ compile_vs(struct svga_context *svga,
                    " using dummy shader instead.\n",
                    (unsigned) (variant->nr_tokens
                                * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      /* Use simple pass-through shader instead */
       variant = get_compiled_dummy_vertex_shader(svga, vs, key);
       if (!variant) {
          ret = PIPE_ERROR;
-- 
cgit v1.2.3


From a9b143a648ff77ac76ff6f4580559f1a061e37f9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 10 Sep 2015 08:55:01 -0600
Subject: svga: clean up the compile_vs/gs/fs() functions

Sipmlify structure and remove gotos.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_state_fs.c | 29 ++++++++++-------------------
 src/gallium/drivers/svga/svga_state_gs.c | 19 +++++++------------
 src/gallium/drivers/svga/svga_state_vs.c | 28 ++++++++++------------------
 3 files changed, 27 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 5d39100f63a..c244d5352d9 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -136,13 +136,8 @@ compile_fs(struct svga_context *svga,
       debug_printf("Failed to compile fragment shader,"
                    " using dummy shader instead.\n");
       variant = get_compiled_dummy_shader(svga, fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
    }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
       debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
@@ -152,29 +147,25 @@ compile_fs(struct svga_context *svga,
       svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
       /* Use simple pass-through shader instead */
       variant = get_compiled_dummy_shader(svga, fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
    }
 
    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      return ret;
+   }
 
    *out_variant = variant;
 
-   /* insert variants at head of linked list */
+   /* insert variant at head of linked list */
    variant->next = fs->base.variants;
    fs->base.variants = variant;
 
    return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-   }
-   return ret;
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c
index 9f6885b95a2..7f75410fb57 100644
--- a/src/gallium/drivers/svga/svga_state_gs.c
+++ b/src/gallium/drivers/svga/svga_state_gs.c
@@ -80,34 +80,29 @@ compile_gs(struct svga_context *svga,
       /* some problem during translation, try the dummy shader */
       const struct tgsi_token *dummy = get_dummy_geometry_shader();
       if (!dummy) {
-         ret = PIPE_ERROR_OUT_OF_MEMORY;
-         goto fail;
+         return PIPE_ERROR_OUT_OF_MEMORY;
       }
       debug_printf("Failed to compile geometry shader, using dummy shader instead.\n");
       FREE((void *) gs->base.tokens);
       gs->base.tokens = dummy;
       variant = translate_geometry_program(svga, gs, key);
       if (variant == NULL) {
-         ret = PIPE_ERROR;
-         goto fail;
+         return PIPE_ERROR;
       }
    }
 
    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+      return ret;
+   }
 
    *out_variant = variant;
 
    return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
-   }
-   return ret;
 }
 
+
 static void
 make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
 {
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index de41519593c..a846b779e70 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -127,13 +127,8 @@ compile_vs(struct svga_context *svga,
       debug_printf("Failed to compile vertex shader,"
                    " using dummy shader instead.\n");
       variant = get_compiled_dummy_vertex_shader(svga, vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
    }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
       /* too big, use dummy shader */
       debug_printf("Shader too large (%u bytes),"
                    " using dummy shader instead.\n",
@@ -143,27 +138,24 @@ compile_vs(struct svga_context *svga,
       svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
       /* Use simple pass-through shader instead */
       variant = get_compiled_dummy_vertex_shader(svga, vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
    }
 
    ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      return ret;
+   }
 
    *out_variant = variant;
 
    return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-   }
-   return ret;
 }
 
+
 /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
  */
 static void
-- 
cgit v1.2.3


From aa1a5c0c9e0de71ecd15e7190c121d6dd98471a6 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 19 Aug 2015 19:24:45 -0700
Subject: i965: Remove horizontal bars from file header comments

Why was that ever a thing?

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_clear.c            | 6 ++----
 src/mesa/drivers/dri/i965/brw_draw.c             | 6 ++----
 src/mesa/drivers/dri/i965/brw_draw.h             | 6 ++----
 src/mesa/drivers/dri/i965/brw_draw_upload.c      | 6 ++----
 src/mesa/drivers/dri/i965/intel_batchbuffer.c    | 6 ++----
 src/mesa/drivers/dri/i965/intel_blit.c           | 7 ++-----
 src/mesa/drivers/dri/i965/intel_blit.h           | 6 ++----
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 6 ++----
 src/mesa/drivers/dri/i965/intel_buffer_objects.h | 6 ++----
 src/mesa/drivers/dri/i965/intel_buffers.c        | 6 ++----
 src/mesa/drivers/dri/i965/intel_buffers.h        | 7 ++-----
 src/mesa/drivers/dri/i965/intel_extensions.c     | 6 ++----
 src/mesa/drivers/dri/i965/intel_fbo.c            | 7 ++-----
 src/mesa/drivers/dri/i965/intel_fbo.h            | 6 ++----
 src/mesa/drivers/dri/i965/intel_image.h          | 6 ++----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c    | 6 ++----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h    | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel.c          | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel.h          | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel_bitmap.c   | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel_copy.c     | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel_draw.c     | 6 ++----
 src/mesa/drivers/dri/i965/intel_pixel_read.c     | 6 ++----
 src/mesa/drivers/dri/i965/intel_reg.h            | 6 ++----
 src/mesa/drivers/dri/i965/intel_screen.c         | 6 ++----
 src/mesa/drivers/dri/i965/intel_screen.h         | 6 ++----
 src/mesa/drivers/dri/i965/intel_state.c          | 7 ++-----
 src/mesa/drivers/dri/i965/intel_tex.h            | 6 ++----
 src/mesa/drivers/dri/i965/intel_tex_copy.c       | 6 ++----
 src/mesa/drivers/dri/i965/intel_tex_obj.h        | 6 ++----
 src/mesa/drivers/dri/i965/intel_tex_subimage.c   | 7 ++-----
 31 files changed, 62 insertions(+), 129 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index f981388ef1a..51f76f489c6 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * Copyright 2009, 2012 Intel Corporation.
  * All Rights Reserved.
@@ -23,8 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/glheader.h"
 #include "main/mtypes.h"
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index e5de4202c01..c6f99bcc718 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include <sys/errno.h>
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index f994726f5b6..3634b8254d1 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -1,5 +1,4 @@
- /**************************************************************************
- *
+/*
  * Copyright 2005 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef BRW_DRAW_H
 #define BRW_DRAW_H
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 21d8f1e6994..176f747b07b 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/glheader.h"
 #include "main/bufferobj.h"
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 85f20a05729..dd3229ccdf6 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 5a1da128888..b7297cefc2d 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,9 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
 
 #include "main/mtypes.h"
 #include "main/blit.h"
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index c3d19a5a20e..b5c46546e94 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_BLIT_H
 #define INTEL_BLIT_H
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index ff05b5cd0e7..164399f8343 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 /**
  * @file intel_buffer_objects.c
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
index 5eaf9dce29f..cb6f353be11 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2005 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_BUFFEROBJ_H
 #define INTEL_BUFFEROBJ_H
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c
index 6ad67f1a63e..60041129c9f 100644
--- a/src/mesa/drivers/dri/i965/intel_buffers.c
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "brw_context.h"
 #include "intel_buffers.h"
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h
index 9014029e3be..8594e98c239 100644
--- a/src/mesa/drivers/dri/i965/intel_buffers.h
+++ b/src/mesa/drivers/dri/i965/intel_buffers.h
@@ -1,6 +1,4 @@
-
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -23,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_BUFFERS_H
 #define INTEL_BUFFERS_H
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3c764be07fb..51ef2bc446d 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/version.h"
 
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 64d57e8bc89..026784a950c 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,9 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
 
 #include "main/enums.h"
 #include "main/imports.h"
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index c7cc57073c1..9147587bab3 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_FBO_H
 #define INTEL_FBO_H
diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h
index a82cf3b3bd1..94f4f34531b 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_IMAGE_H
 #define INTEL_IMAGE_H
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 1259664de2a..2053448b4bf 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include <GL/gl.h>
 #include <GL/internal/dri_interface.h>
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index c28162a1983..bab543c031c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 /** @file intel_mipmap_tree.h
  *
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c
index 34543334cde..8aa50caa568 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/accum.h"
 #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.h b/src/mesa/drivers/dri/i965/intel_pixel.h
index bc184efd7f5..6f302fd19bd 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel.h
+++ b/src/mesa/drivers/dri/i965/intel_pixel.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTEL_PIXEL_H
 #define INTEL_PIXEL_H
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
index 224dc6572cc..f938bf4cdec 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/blend.h"
 #include "main/glheader.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
index ce053edbe36..d7436f3fe9f 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/glheader.h"
 #include "main/image.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
index 6c6bd8629ac..1afa6c26916 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2006 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/glheader.h"
 #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index eb366cd3e34..1bafe880356 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/glheader.h"
 #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index b4283da9633..4eb93499658 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #define CMD_MI				(0x0 << 29)
 #define CMD_2D				(0x2 << 29)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 85863a0827e..6bb4c47077c 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include <errno.h>
 #include <time.h>
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index fd5143eecba..85614dd628b 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef _INTEL_INIT_H_
 #define _INTEL_INIT_H_
diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c
index d7346709cf4..55dbc6dc2ca 100644
--- a/src/mesa/drivers/dri/i965/intel_state.c
+++ b/src/mesa/drivers/dri/i965/intel_state.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,9 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
 
 #include "main/glheader.h"
 #include "main/context.h"
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
index 402a3891ecd..bc51f0738f0 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ b/src/mesa/drivers/dri/i965/intel_tex.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef INTELTEX_INC
 #define INTELTEX_INC
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index ecdd052fdf6..c894d88b7fc 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/mtypes.h"
 #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h
index e078e0a7553..60f12076ef5 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -22,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #ifndef _INTEL_TEX_OBJ_H
 #define _INTEL_TEX_OBJ_H
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 44921e5242c..2c46858b4fd 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -1,6 +1,4 @@
-
-/**************************************************************************
- *
+/*
  * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  *
@@ -23,8 +21,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
 
 #include "main/bufferobj.h"
 #include "main/image.h"
-- 
cgit v1.2.3


From 284dcad20a9a55278c1d3dd98f53e45650b22896 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 19 Aug 2015 16:36:35 -0700
Subject: i965: Fix typos in license

grep -lr 'sub license' | while read f; do \
    sed --in-place -e 's/sub license/sublicense/' $f ;\
    done

grep -lr 'NON-INFRINGEMENT' | while read f; do \
    sed --in-place -e 's/NON-INFRINGEMENT/NONINFRINGEMENT/' $f ;\
    done

As noted by Matt, both of these changes match the MIT license text found
at http://opensource.org/licenses/MIT.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_clear.c            | 4 ++--
 src/mesa/drivers/dri/i965/brw_draw.c             | 4 ++--
 src/mesa/drivers/dri/i965/brw_draw.h             | 4 ++--
 src/mesa/drivers/dri/i965/brw_draw_upload.c      | 4 ++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.c    | 4 ++--
 src/mesa/drivers/dri/i965/intel_blit.c           | 4 ++--
 src/mesa/drivers/dri/i965/intel_blit.h           | 4 ++--
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 4 ++--
 src/mesa/drivers/dri/i965/intel_buffer_objects.h | 4 ++--
 src/mesa/drivers/dri/i965/intel_buffers.c        | 4 ++--
 src/mesa/drivers/dri/i965/intel_buffers.h        | 4 ++--
 src/mesa/drivers/dri/i965/intel_extensions.c     | 4 ++--
 src/mesa/drivers/dri/i965/intel_fbo.c            | 4 ++--
 src/mesa/drivers/dri/i965/intel_fbo.h            | 4 ++--
 src/mesa/drivers/dri/i965/intel_image.h          | 4 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c    | 4 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h    | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel.c          | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel.h          | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel_bitmap.c   | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel_copy.c     | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel_draw.c     | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel_read.c     | 4 ++--
 src/mesa/drivers/dri/i965/intel_reg.h            | 4 ++--
 src/mesa/drivers/dri/i965/intel_screen.c         | 4 ++--
 src/mesa/drivers/dri/i965/intel_screen.h         | 4 ++--
 src/mesa/drivers/dri/i965/intel_state.c          | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex.h            | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex_copy.c       | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex_obj.h        | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex_subimage.c   | 4 ++--
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c   | 4 ++--
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.h   | 4 ++--
 33 files changed, 66 insertions(+), 66 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 51f76f489c6..17a745d0373 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -7,7 +7,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -17,7 +17,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c6f99bcc718..6a75e067915 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index 3634b8254d1..695973b5bec 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 176f747b07b..a0ae0151306 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index dd3229ccdf6..0363bd3789a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index b7297cefc2d..3a2a1818bc7 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index b5c46546e94..f4ed919b000 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 164399f8343..a51095f2d65 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
index cb6f353be11..b523edc4f09 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c
index 60041129c9f..c98e19382c3 100644
--- a/src/mesa/drivers/dri/i965/intel_buffers.c
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h
index 8594e98c239..85f54b2c653 100644
--- a/src/mesa/drivers/dri/i965/intel_buffers.h
+++ b/src/mesa/drivers/dri/i965/intel_buffers.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 51ef2bc446d..8842f50935a 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 026784a950c..af44325988a 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index 9147587bab3..5ba4c8f416c 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h
index 94f4f34531b..9b3816efd13 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 2053448b4bf..eafd2a6be9a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bab543c031c..7b5f97b3368 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c
index 8aa50caa568..30d3a521ec8 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.h b/src/mesa/drivers/dri/i965/intel_pixel.h
index 6f302fd19bd..f5b931f5c1f 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel.h
+++ b/src/mesa/drivers/dri/i965/intel_pixel.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
index f938bf4cdec..3326ac4d779 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
index d7436f3fe9f..066b6a278b9 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
index 1afa6c26916..0c8a9180d47 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 1bafe880356..9bcbbd118ca 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 4eb93499658..58007d3047f 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 6bb4c47077c..17838350cda 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 85614dd628b..96bb995d66f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c
index 55dbc6dc2ca..498cab49ec4 100644
--- a/src/mesa/drivers/dri/i965/intel_state.c
+++ b/src/mesa/drivers/dri/i965/intel_state.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
index bc51f0738f0..dc83d0891ea 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ b/src/mesa/drivers/dri/i965/intel_tex.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index c894d88b7fc..9c255ae82b6 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h
index 60f12076ef5..750e4c33529 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 2c46858b4fd..970ded1e66b 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -6,7 +6,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -16,7 +16,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index dcf0462ed3c..2383401d14c 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -8,7 +8,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -18,7 +18,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
index 9dc1088d25c..01543bf298d 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -8,7 +8,7 @@
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
@@ -18,7 +18,7 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-- 
cgit v1.2.3


From 6efae687b7ef46eb1f738f1bc2acbde1805e364b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:01:29 -0400
Subject: mesa: add infra for ARB_shader_texture_image_samples

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/main/extensions.c | 1 +
 src/mesa/main/mtypes.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a57d5baeafd..1f7d5420fff 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -159,6 +159,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
    { "GL_ARB_shader_storage_buffer_object",        o(ARB_shader_storage_buffer_object),        GL,             2012 },
    { "GL_ARB_shader_subroutine",                   o(ARB_shader_subroutine),                   GLC,            2010 },
+   { "GL_ARB_shader_texture_image_samples",        o(ARB_shader_texture_image_samples),        GL,             2014 },
    { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },
    { "GL_ARB_shading_language_100",                o(dummy_true),                              GLL,            2003 },
    { "GL_ARB_shading_language_packing",            o(ARB_shading_language_packing),            GL,             2011 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 85a9f5dc5f1..fac45aae81c 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3658,6 +3658,7 @@ struct gl_extensions
    GLboolean ARB_shader_stencil_export;
    GLboolean ARB_shader_storage_buffer_object;
    GLboolean ARB_shader_subroutine;
+   GLboolean ARB_shader_texture_image_samples;
    GLboolean ARB_shader_texture_lod;
    GLboolean ARB_shading_language_packing;
    GLboolean ARB_shading_language_420pack;
-- 
cgit v1.2.3


From f9052914e979228d28502a7f7953f98ff4731f99 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:03:46 -0400
Subject: glsl: add ir_texture_samples texture opcode

Will be used for textureSamples()

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ir.cpp                            | 5 +++--
 src/glsl/ir.h                              | 3 ++-
 src/glsl/ir_clone.cpp                      | 1 +
 src/glsl/ir_equals.cpp                     | 1 +
 src/glsl/ir_hv_accept.cpp                  | 1 +
 src/glsl/ir_print_visitor.cpp              | 6 ++++--
 src/glsl/ir_reader.cpp                     | 6 +++++-
 src/glsl/ir_rvalue_visitor.cpp             | 1 +
 src/glsl/opt_tree_grafting.cpp             | 1 +
 src/mesa/program/ir_to_mesa.cpp            | 2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 ++
 11 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 724861b1e9f..fb58c3b4ef6 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1398,7 +1398,7 @@ ir_dereference::is_lvalue() const
 }
 
 
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1427,7 +1427,8 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
    this->sampler = sampler;
    this->type = type;
 
-   if (this->op == ir_txs || this->op == ir_query_levels) {
+   if (this->op == ir_txs || this->op == ir_query_levels ||
+       this->op == ir_texture_samples) {
       assert(type->base_type == GLSL_TYPE_INT);
    } else if (this->op == ir_lod) {
       assert(type->vector_elements == 2);
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 7cdea017af8..bb7fa0e0af4 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1914,7 +1914,8 @@ enum ir_texture_opcode {
    ir_txs,		/**< Texture size */
    ir_lod,		/**< Texture lod query */
    ir_tg4,		/**< Texture gather */
-   ir_query_levels      /**< Texture levels query */
+   ir_query_levels,     /**< Texture levels query */
+   ir_texture_samples,  /**< Texture samples query */
 };
 
 
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index a8fac183a8d..a2cd672d5d6 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -222,6 +222,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index cc1964eefee..5f0785e0ece 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -151,6 +151,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index d3662cf5063..6495cc4581d 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -194,6 +194,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index b6832692419..b9196900ea6 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -274,7 +274,8 @@ void ir_print_visitor::visit(ir_texture *ir)
    ir->sampler->accept(this);
    fprintf(f, " ");
 
-   if (ir->op != ir_txs && ir->op != ir_query_levels) {
+   if (ir->op != ir_txs && ir->op != ir_query_levels &&
+       ir->op != ir_texture_samples) {
       ir->coordinate->accept(this);
 
       fprintf(f, " ");
@@ -290,7 +291,7 @@ void ir_print_visitor::visit(ir_texture *ir)
 
    if (ir->op != ir_txf && ir->op != ir_txf_ms &&
        ir->op != ir_txs && ir->op != ir_tg4 &&
-       ir->op != ir_query_levels) {
+       ir->op != ir_query_levels && ir->op != ir_texture_samples) {
       if (ir->projector)
 	 ir->projector->accept(this);
       else
@@ -310,6 +311,7 @@ void ir_print_visitor::visit(ir_texture *ir)
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       ir->lod_info.bias->accept(this);
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index 979653304cc..07720e28749 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -960,6 +960,8 @@ ir_reader::read_texture(s_expression *expr)
       { "tg4", s_type, s_sampler, s_coord, s_offset, s_component };
    s_pattern query_levels_pattern[] =
       { "query_levels", s_type, s_sampler };
+   s_pattern texture_samples_pattern[] =
+      { "samples", s_type, s_sampler };
    s_pattern other_pattern[] =
       { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
 
@@ -977,6 +979,8 @@ ir_reader::read_texture(s_expression *expr)
       op = ir_tg4;
    } else if (MATCH(expr, query_levels_pattern)) {
       op = ir_query_levels;
+   } else if (MATCH(expr, texture_samples_pattern)) {
+      op = ir_texture_samples;
    } else if (MATCH(expr, other_pattern)) {
       op = ir_texture::get_opcode(tag->value());
       if (op == (ir_texture_opcode) -1)
@@ -1029,7 +1033,7 @@ ir_reader::read_texture(s_expression *expr)
 
    if (op != ir_txf && op != ir_txf_ms &&
        op != ir_txs && op != ir_lod && op != ir_tg4 &&
-       op != ir_query_levels) {
+       op != ir_query_levels && op != ir_texture_samples) {
       s_int *proj_as_int = SX_AS_INT(s_proj);
       if (proj_as_int && proj_as_int->value() == 1) {
 	 tex->projector = NULL;
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index 2eee3da7b22..a6966f546bc 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -58,6 +58,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index 7f2ee6cee34..a7a219c55ca 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -274,6 +274,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
    case ir_tex:
    case ir_lod:
    case ir_query_levels:
+   case ir_texture_samples:
       break;
    case ir_txb:
       if (do_graft(&ir->lod_info.bias))
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0defed83207..afb400f714c 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1919,6 +1919,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    case ir_query_levels:
       assert(!"Unexpected ir_query_levels opcode");
       break;
+   case ir_texture_samples:
+      unreachable("Unexpected ir_texture_samples opcode");
    }
 
    const glsl_type *sampler_type = ir->sampler->type;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6c9f9477a17..625c4e9c8a6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3228,6 +3228,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    case ir_lod:
       opcode = TGSI_OPCODE_LODQ;
       break;
+   case ir_texture_samples:
+      unreachable("unexpected texture op");
    }
 
    if (ir->projector) {
-- 
cgit v1.2.3


From 1807a08e4f35b014f2a80d1e88dd74a9f096d7a5 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:05:03 -0400
Subject: nir: add nir_texop_texture_samples and convert from glsl

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp | 5 +++++
 src/glsl/nir/nir.h           | 4 +++-
 src/glsl/nir/nir_print.c     | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index ad7d7dddeaf..bf78b4253d9 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1718,6 +1718,11 @@ nir_visitor::visit(ir_texture *ir)
       num_srcs = 0;
       break;
 
+   case ir_texture_samples:
+      op = nir_texop_texture_samples;
+      num_srcs = 0;
+      break;
+
    default:
       unreachable("not reached");
    }
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 2ba7731c1ff..a93ff11f01b 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -944,7 +944,8 @@ typedef enum {
    nir_texop_txs,                /**< Texture size */
    nir_texop_lod,                /**< Texture lod query */
    nir_texop_tg4,                /**< Texture gather */
-   nir_texop_query_levels       /**< Texture levels query */
+   nir_texop_query_levels,       /**< Texture levels query */
+   nir_texop_texture_samples,    /**< Texture samples query */
 } nir_texop;
 
 typedef struct {
@@ -1016,6 +1017,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
    case nir_texop_lod:
       return 2;
 
+   case nir_texop_texture_samples:
    case nir_texop_query_levels:
       return 1;
 
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index f591c4b5f8d..69cadbae2b6 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -417,6 +417,9 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
    case nir_texop_query_levels:
       fprintf(fp, "query_levels ");
       break;
+   case nir_texop_texture_samples:
+      fprintf(fp, "texture_samples ");
+      break;
 
    default:
       unreachable("Invalid texture operation");
-- 
cgit v1.2.3


From fb18ee9ba6950e61f6d4ac28a0b5aea85c457d79 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:06:29 -0400
Subject: glsl: add ARB_shader_texture_image_samples infrastructure

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/glcpp/glcpp-parse.y    | 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 3 files changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 2d631f08c29..1d7a3af8b74 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2483,6 +2483,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
               if (extensions->ARB_shader_image_size)
                  add_builtin_define(parser, "GL_ARB_shader_image_size", 1);
 
+              if (extensions->ARB_shader_texture_image_samples)
+                 add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1);
+
               if (extensions->ARB_derivative_control)
                  add_builtin_define(parser, "GL_ARB_derivative_control", 1);
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 939a03cb0d7..fb76614133d 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -604,6 +604,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
    EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
    EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+   EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
    EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
    EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
    EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 295cd10ba14..d3b091d9dce 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -510,6 +510,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_shader_storage_buffer_object_warn;
    bool ARB_shader_subroutine_enable;
    bool ARB_shader_subroutine_warn;
+   bool ARB_shader_texture_image_samples_enable;
+   bool ARB_shader_texture_image_samples_warn;
    bool ARB_shader_texture_lod_enable;
    bool ARB_shader_texture_lod_warn;
    bool ARB_shading_language_420pack_enable;
-- 
cgit v1.2.3


From 0c7fbcb84403ec318c77a51c98addd653529ae48 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:06:57 -0400
Subject: glsl: add support for the textureSamples function

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/builtin_functions.cpp | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 3b4a9df808c..cdc016e3971 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -427,6 +427,13 @@ shader_image_size(const _mesa_glsl_parse_state *state)
            state->ARB_shader_image_size_enable;
 }
 
+static bool
+shader_samples(const _mesa_glsl_parse_state *state)
+{
+   return state->is_version(450, 0) ||
+          state->ARB_shader_texture_image_samples_enable;
+}
+
 static bool
 gs_streams(const _mesa_glsl_parse_state *state)
 {
@@ -666,6 +673,7 @@ private:
    B1(all);
    B1(not);
    BA2(textureSize);
+   B1(textureSamples);
 
 /** Flags to _texture() */
 #define TEX_PROJECT 1
@@ -1407,6 +1415,16 @@ builtin_builder::create_builtins()
                 _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type),
                 NULL);
 
+   add_function("textureSamples",
+                _textureSamples(glsl_type::sampler2DMS_type),
+                _textureSamples(glsl_type::isampler2DMS_type),
+                _textureSamples(glsl_type::usampler2DMS_type),
+
+                _textureSamples(glsl_type::sampler2DMSArray_type),
+                _textureSamples(glsl_type::isampler2DMSArray_type),
+                _textureSamples(glsl_type::usampler2DMSArray_type),
+                NULL);
+
    add_function("texture",
                 _texture(ir_tex, v130, glsl_type::vec4_type,  glsl_type::sampler1D_type,  glsl_type::float_type),
                 _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type),
@@ -4169,6 +4187,19 @@ builtin_builder::_textureSize(builtin_available_predicate avail,
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_textureSamples(const glsl_type *sampler_type)
+{
+   ir_variable *s = in_var(sampler_type, "sampler");
+   MAKE_SIG(glsl_type::int_type, shader_samples, 1, s);
+
+   ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples);
+   tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type);
+   body.emit(ret(tex));
+
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_texture(ir_texture_opcode opcode,
                           builtin_available_predicate avail,
-- 
cgit v1.2.3


From 0b91bcea98c0fe201bba89abe1ca3aee4d04c56c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 11 Aug 2015 20:37:32 -0400
Subject: i965: add support for textureSamples function

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
[v2: kayden-supplied code in fs_nir replacing need for logical opcode]
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h          |  2 ++
 src/mesa/drivers/dri/i965/brw_disasm.c           |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp             |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  4 ++++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp         | 10 ++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp       |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 10 +++++++++-
 10 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a8594afa486..d18f7ca4f73 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -979,6 +979,7 @@ enum opcode {
    SHADER_OPCODE_TG4_LOGICAL,
    SHADER_OPCODE_TG4_OFFSET,
    SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+   SHADER_OPCODE_SAMPLEINFO,
 
    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
@@ -1511,6 +1512,7 @@ enum brw_message_target {
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4      8
 #define GEN5_SAMPLER_MESSAGE_LOD                 9
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+#define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO   11
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C    16
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO   17
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 7401e325638..db23a187a93 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -617,6 +617,7 @@ static const char *const gen5_sampler_msg_type[] = {
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4]      = "gather4",
    [GEN5_SAMPLER_MESSAGE_LOD]                 = "lod",
    [GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO]      = "resinfo",
+   [GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO]   = "sampleinfo",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C]    = "gather4_c",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO]   = "gather4_po",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d240371767b..17cbdf46582 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -877,6 +877,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_LOD:
+   case SHADER_OPCODE_SAMPLEINFO:
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca043b63..90805e45ad7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -646,6 +646,9 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
          }
          break;
+      case SHADER_OPCODE_SAMPLEINFO:
+         msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+         break;
       default:
 	 unreachable("not reached");
       }
@@ -1920,6 +1923,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_LOD:
       case SHADER_OPCODE_TG4:
       case SHADER_OPCODE_TG4_OFFSET:
+      case SHADER_OPCODE_SAMPLEINFO:
 	 generate_tex(inst, dst, src[0], src[1]);
 	 break;
       case FS_OPCODE_DDX_COARSE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a6c6a2fa8db..0902e1c396e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1842,6 +1842,16 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
    case nir_texop_txf_ms: op = ir_txf_ms; break;
    case nir_texop_txl: op = ir_txl; break;
    case nir_texop_txs: op = ir_txs; break;
+   case nir_texop_texture_samples: {
+      fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+      fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+                               bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+                               sampler_reg);
+      inst->mlen = 1;
+      inst->header_size = 1;
+      inst->base_mrf = -1;
+      return;
+   }
    default:
       unreachable("unknown texture opcode");
    }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index de1a7fe81a8..d353591c356 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -618,6 +618,8 @@ brw_instruction_name(enum opcode op)
       return "tg4_offset";
    case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
       return "tg4_offset_logical";
+   case SHADER_OPCODE_SAMPLEINFO:
+      return "sampleinfo";
 
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return "shader_time_add";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 85dc37238d1..893ff356afa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -331,6 +331,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
+   case SHADER_OPCODE_SAMPLEINFO:
       return inst->header_size;
    default:
       unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 92050b94d33..195033358fb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -286,6 +286,9 @@ vec4_generator::generate_tex(vec4_instruction *inst,
             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
          }
          break;
+      case SHADER_OPCODE_SAMPLEINFO:
+         msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+         break;
       default:
 	 unreachable("should not get here: invalid vec4 texture opcode");
       }
@@ -1374,6 +1377,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
       case SHADER_OPCODE_TXS:
       case SHADER_OPCODE_TG4:
       case SHADER_OPCODE_TG4_OFFSET:
+      case SHADER_OPCODE_SAMPLEINFO:
          generate_tex(inst, dst, src[0], src[1]);
          break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 751ec73f709..c21fd02bf68 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1337,6 +1337,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
    switch (texop) {
    case nir_texop_lod: op = ir_lod; break;
    case nir_texop_query_levels: op = ir_query_levels; break;
+   case nir_texop_texture_samples: op = ir_texture_samples; break;
    case nir_texop_tex: op = ir_tex; break;
    case nir_texop_tg4: op = ir_tg4; break;
    case nir_texop_txb: op = ir_txb; break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f6e59ce4196..04657704405 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2550,6 +2550,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    case ir_tg4: opcode = offset_value.file != BAD_FILE
                          ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
    case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+   case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
    case ir_txb:
       unreachable("TXB is not valid for vertex shaders.");
    case ir_lod:
@@ -2569,13 +2570,15 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     * - Texel offsets
     * - Gather channel selection
     * - Sampler indices too large to fit in a 4-bit value.
+    * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
     */
    inst->header_size =
       (devinfo->gen < 5 || devinfo->gen >= 9 ||
        inst->offset != 0 || op == ir_tg4 ||
+       op == ir_texture_samples ||
        is_high_sampler(sampler_reg)) ? 1 : 0;
    inst->base_mrf = 2;
-   inst->mlen = inst->header_size + 1; /* always at least one */
+   inst->mlen = inst->header_size;
    inst->dst.writemask = WRITEMASK_XYZW;
    inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
 
@@ -2587,6 +2590,9 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    if (op == ir_txs || op == ir_query_levels) {
       int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
       emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+      inst->mlen++;
+   } else if (op == ir_texture_samples) {
+      inst->dst.writemask = WRITEMASK_X;
    } else {
       /* Load the coordinate */
       /* FINISHME: gl_clamp_mask and saturate */
@@ -2595,6 +2601,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
 
       emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
                coordinate));
+      inst->mlen++;
 
       if (zero_mask != 0) {
          emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
@@ -2817,6 +2824,7 @@ vec4_visitor::visit(ir_texture *ir)
    case ir_txb:
    case ir_lod:
    case ir_tg4:
+   case ir_texture_samples:
       break;
    }
 
-- 
cgit v1.2.3


From 37c5c862810ddf8f3973ec738aa0a2a33af3bcb2 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:22:18 -0400
Subject: glsl: add support for the imageSamples function

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/builtin_functions.cpp | 43 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index cdc016e3971..06a29bcfb10 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -539,7 +539,8 @@ private:
       IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
       IMAGE_FUNCTION_READ_ONLY = (1 << 4),
       IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
-      IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6)
+      IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
+      IMAGE_FUNCTION_MS_ONLY = (1 << 7),
    };
 
    /**
@@ -750,6 +751,9 @@ private:
    ir_function_signature *_image_size_prototype(const glsl_type *image_type,
                                                 unsigned num_arguments,
                                                 unsigned flags);
+   ir_function_signature *_image_samples_prototype(const glsl_type *image_type,
+                                                   unsigned num_arguments,
+                                                   unsigned flags);
    ir_function_signature *_image(image_prototype_ctr prototype,
                                  const glsl_type *image_type,
                                  const char *intrinsic_name,
@@ -2685,8 +2689,10 @@ builtin_builder::add_image_function(const char *name,
    ir_function *f = new(mem_ctx) ir_function(name);
 
    for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
-      if (types[i]->sampler_type != GLSL_TYPE_FLOAT ||
-          (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE))
+      if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
+           (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
+          (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
+           !(flags & IMAGE_FUNCTION_MS_ONLY)))
          f->add_signature(_image(prototype, types[i], intrinsic_name,
                                  num_arguments, flags));
    }
@@ -2754,6 +2760,12 @@ builtin_builder::add_image_functions(bool glsl)
                       "__intrinsic_image_size",
                       &builtin_builder::_image_size_prototype, 1,
                       flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE);
+
+   add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples",
+                      "__intrinsic_image_samples",
+                      &builtin_builder::_image_samples_prototype, 1,
+                      flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE |
+                      IMAGE_FUNCTION_MS_ONLY);
 }
 
 ir_variable *
@@ -4981,6 +4993,31 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_image_samples_prototype(const glsl_type *image_type,
+                                          unsigned num_arguments,
+                                          unsigned flags)
+{
+   ir_variable *image = in_var(image_type, "image");
+   ir_function_signature *sig =
+      new_sig(glsl_type::int_type, shader_samples, 1, image);
+
+   /* Set the maximal set of qualifiers allowed for this image
+    * built-in.  Function calls with arguments having fewer
+    * qualifiers than present in the prototype are allowed by the
+    * spec, but not with more, i.e. this will make the compiler
+    * accept everything that needs to be accepted, and reject cases
+    * like loads from write-only or stores to read-only images.
+    */
+   image->data.image_read_only = true;
+   image->data.image_write_only = true;
+   image->data.image_coherent = true;
+   image->data.image_volatile = true;
+   image->data.image_restrict = true;
+
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_image(image_prototype_ctr prototype,
                         const glsl_type *image_type,
-- 
cgit v1.2.3


From 56238305e5474d749881006622c5ee65dee4fb40 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:27:50 -0400
Subject: nir: convert glsl imageSamples into a new intrinsic

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp  | 6 +++++-
 src/glsl/nir/nir_intrinsics.h | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index bf78b4253d9..944f7462911 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -644,6 +644,8 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_memory_barrier;
       } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
          op = nir_intrinsic_image_size;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+         op = nir_intrinsic_image_samples;
       } else {
          unreachable("not reached");
       }
@@ -670,6 +672,7 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_image_atomic_xor:
       case nir_intrinsic_image_atomic_exchange:
       case nir_intrinsic_image_atomic_comp_swap:
+      case nir_intrinsic_image_samples:
       case nir_intrinsic_image_size: {
          nir_ssa_undef_instr *instr_undef =
             nir_ssa_undef_instr_create(shader, 1);
@@ -693,7 +696,8 @@ nir_visitor::visit(ir_call *ir)
                               info->dest_components, NULL);
          }
 
-         if (op == nir_intrinsic_image_size)
+         if (op == nir_intrinsic_image_size ||
+             op == nir_intrinsic_image_samples)
             break;
 
          /* Set the address argument, extending the coordinate vector to four
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index ed309b602c2..07dd5cf1ec3 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -125,6 +125,8 @@ INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
 INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
 INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 #define SYSTEM_VALUE(name, components) \
    INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
-- 
cgit v1.2.3


From 55ebaa6d003b69c0a159a00d82a1e96f685062d6 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 27 Aug 2015 23:34:47 -0400
Subject: i965: add handling for imageSamples

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 0902e1c396e..1cc7b021928 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1436,6 +1436,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_image_samples:
+      /* The driver does not support multi-sampled images. */
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+      break;
+
    case nir_intrinsic_load_front_face:
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
               *emit_frontfacing_interpolation());
-- 
cgit v1.2.3


From bfc5ace5bda6510d4dd3893a3b8c677a1c85e23e Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 11 Aug 2015 20:38:48 -0400
Subject: i965: enable ARB_shader_texture_image_samples

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 docs/GL3.txt                                 | 2 +-
 docs/relnotes/11.1.0.html                    | 2 +-
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 8ad1aac50a5..25350021720 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -194,7 +194,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_direct_state_access                           DONE (all drivers)
   GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  not started
+  GL_ARB_shader_texture_image_samples                  DONE (i965)
   GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
   GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
   GL_KHR_robust_buffer_access_behavior                 not started
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 4b56f69660d..603b06f41c9 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,8 +44,8 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_ARB_shader_texture_image_samples on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
-TBD.
 </ul>
 
 <h2>Bug fixes</h2>
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 8842f50935a..e6d39e00788 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -325,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_shader_atomic_counters = true;
       ctx->Extensions.ARB_shader_image_load_store = true;
       ctx->Extensions.ARB_shader_image_size = true;
+      ctx->Extensions.ARB_shader_texture_image_samples = true;
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
 
-- 
cgit v1.2.3


From 2b9c35945a74c6e2b559bb52eb612231465e86a1 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 3 Oct 2014 18:58:39 -0700
Subject: i965: Support CS in update_stage_texture_surfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8213f4ea2fb..3af4b995a94 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -848,10 +848,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
    /* BRW_NEW_FRAGMENT_PROGRAM */
    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   struct gl_program *cs = (struct gl_program *) brw->compute_program;
+
    /* _NEW_TEXTURE */
    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
+   update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 
    /* emit alternate set of surface state for gather. this
     * allows the surface format to be overriden for only the
@@ -863,6 +867,8 @@ brw_update_texture_surfaces(struct brw_context *brw)
          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
       if (fs && fs->UsesGather)
          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
+      if (cs && cs->UsesGather)
+         update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
    }
 
    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
@@ -872,6 +878,7 @@ const struct brw_tracked_state brw_texture_surfaces = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
       .brw = BRW_NEW_BATCH |
+             BRW_NEW_COMPUTE_PROGRAM |
              BRW_NEW_FRAGMENT_PROGRAM |
              BRW_NEW_FS_PROG_DATA |
              BRW_NEW_GEOMETRY_PROGRAM |
-- 
cgit v1.2.3


From 75d04e561b58d1f00e387c3402e2c299eb3dee87 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 3 Oct 2014 19:01:24 -0700
Subject: i965: Support compute shaders in is_scalar_shader_stage()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d353591c356..cf9aa232386 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -195,6 +195,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage)
 {
    switch (stage) {
    case MESA_SHADER_FRAGMENT:
+   case MESA_SHADER_COMPUTE:
       return true;
    case MESA_SHADER_VERTEX:
       return brw->intelScreen->compiler->scalar_vs;
-- 
cgit v1.2.3


From af48612b88cb51cd3b957e70490462c0c404f92c Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 3 Oct 2014 19:05:32 -0700
Subject: i965/fs: Set first_non_payload_grf in assign_curb_setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

first_non_payload_grf may be updated in assign_urb_setup for FS or
assign_vs_urb_setup for VS.

We need to set this in assign_curb_setup for compute shaders since cs
does not have an assign_cs_urb_setup like assign_urb_setup (fs) or
assign_vs_urb_setup (vs).

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 17cbdf46582..320f612682d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1394,6 +1394,9 @@ fs_visitor::assign_curb_setup()
 	 }
       }
    }
+
+   /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+   this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
 }
 
 void
@@ -1508,8 +1511,7 @@ fs_visitor::assign_urb_setup()
    }
 
    /* Each attribute is 4 setup channels, each of which is half a reg. */
-   this->first_non_payload_grf =
-      urb_start + prog_data->num_varying_inputs * 2;
+   this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
 }
 
 void
@@ -1524,8 +1526,7 @@ fs_visitor::assign_vs_urb_setup()
       count++;
 
    /* Each attribute is 4 regs. */
-   this->first_non_payload_grf =
-      payload.num_regs + prog_data->curb_read_length + count * 4;
+   this->first_non_payload_grf += count * 4;
 
    unsigned vue_entries =
       MAX2(count, vs_prog_data->base.vue_map.num_slots);
-- 
cgit v1.2.3


From 1180b794871e53a94811f7279f42504bca4d9434 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 3 Oct 2014 19:07:45 -0700
Subject: i965: Set up sampler state for compute shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 8197e6a69dd..605a3fa0058 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -225,12 +225,18 @@ brw_codegen_cs_prog(struct brw_context *brw,
 static void
 brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
 {
+   struct gl_context *ctx = &brw->ctx;
    /* BRW_NEW_COMPUTE_PROGRAM */
    const struct brw_compute_program *cp =
       (struct brw_compute_program *) brw->compute_program;
+   const struct gl_program *prog = (struct gl_program *) cp;
 
    memset(key, 0, sizeof(*key));
 
+   /* _NEW_TEXTURE */
+   brw_populate_sampler_prog_key_data(ctx, prog, brw->cs.base.sampler_count,
+                                      &key->tex);
+
    /* The unique compute program ID */
    key->program_string_id = cp->id;
 }
@@ -248,9 +254,12 @@ brw_upload_cs_prog(struct brw_context *brw)
    if (!cp)
       return;
 
-   if (!brw_state_dirty(brw, 0, BRW_NEW_COMPUTE_PROGRAM))
+   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
       return;
 
+   brw->cs.base.sampler_count =
+      _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed);
+
    brw_cs_populate_key(brw, &key);
 
    if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
@@ -413,7 +422,8 @@ brw_upload_cs_state(struct brw_context *brw)
    if (brw->gen >= 8)
       desc[dw++] = 0; /* Kernel Start Pointer High */
    desc[dw++] = 0;
-   desc[dw++] = 0;
+   desc[dw++] = stage_state->sampler_offset |
+      ((stage_state->sampler_count + 3) / 4);
    desc[dw++] = stage_state->bind_bo_offset;
    desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
    const uint32_t media_threads =
-- 
cgit v1.2.3


From b01d0473917ba15de3aa146006bfef5836d10e93 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Wed, 11 Mar 2015 22:54:20 -0700
Subject: i965/cs: Emit texture surfaces to enable CS sampling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.h      | 2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 41ba7696d40..0318a3fb229 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1517,7 +1517,7 @@ struct brw_context
 
    int num_atoms[BRW_NUM_PIPELINES];
    const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[5];
+   const struct brw_tracked_state compute_atoms[6];
 
    /* If (INTEL_DEBUG & DEBUG_BATCH) */
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 01c090014e4..14627d5231b 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -260,6 +260,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
+   &brw_texture_surfaces,
    &brw_cs_state,
 };
 
@@ -351,6 +352,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
+   &brw_texture_surfaces,
    &brw_cs_state,
 };
 
-- 
cgit v1.2.3


From 34cff76fc2da1ce9abad6e2b1856fec6a950d19c Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Wed, 5 Nov 2014 00:47:41 -0800
Subject: i965/cs: Enable barrier in MEDIA_INTERFACE_DESCRIPTOR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable barrier in MEDIA_INTERFACE_DESCRIPTOR if the program uses the
barrier() GLSL function.

On Ivy Bridge and Haswell, this allows the piglit test
tests/spec/arb_compute_shader/execution/simple-barrier-atomics.shader_test
to pass. On gen8, this enables a similar test with a local group size
of 896 to pass.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.h  | 1 +
 src/mesa/drivers/dri/i965/brw_cs.cpp     | 4 +++-
 src/mesa/drivers/dri/i965/brw_defines.h  | 2 ++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0318a3fb229..b05b8bd69bf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -498,6 +498,7 @@ struct brw_cs_prog_data {
    GLuint dispatch_grf_start_reg_16;
    unsigned local_size[3];
    unsigned simd_size;
+   bool uses_barrier;
 };
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 605a3fa0058..980ef52fe17 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -431,7 +431,9 @@ brw_upload_cs_state(struct brw_context *brw)
       SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
       SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
    assert(threads <= brw->max_cs_threads);
-   desc[dw++] = media_threads;
+   desc[dw++] =
+      SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+      media_threads;
 
    BEGIN_BATCH(4);
    OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d18f7ca4f73..8fc8cebf11e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2690,6 +2690,8 @@ enum brw_wm_barycentric_interp_mode {
 # define MEDIA_CURBE_READ_OFFSET_SHIFT          0
 # define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
 /* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_BARRIER_ENABLE_SHIFT             21
+# define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
 # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
 # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 1cc7b021928..8c3c4aed707 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1705,6 +1705,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_barrier:
       emit_barrier();
+      if (stage == MESA_SHADER_COMPUTE)
+         ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
       break;
 
    default:
-- 
cgit v1.2.3


From b46cbc36072ae050f648f9baa974c4180d9bb8a3 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 4 Sep 2015 01:24:39 +0100
Subject: st/mesa: set the vbuffer to NULL if we are skipping it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we skip a vbuffer we need to make sure we NULL out
the contents, otherwise when it gets passed to the driver
it will get confused.

This was hit by:
GL41-CTS.gpu_shader_fp64.varyings

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/state_tracker/st_atom_array.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 56b8019a36f..0847184d74d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -567,8 +567,12 @@ setup_non_interleaved_attribs(struct st_context *st,
       unsigned src_format;
 
       array = get_client_array(vp, arrays, attr);
-      if (!array)
+      if (!array) {
+         vbuffer[attr].buffer = NULL;
+         vbuffer[attr].user_buffer = NULL;
+         vbuffer[attr].buffer_offset = 0;
          continue;
+      }
 
       stride = array->StrideB;
       bufobj = array->BufferObj;
-- 
cgit v1.2.3


From 38e412d548273382c1e56192d6d0a0fd8d08b3c0 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 9 Sep 2015 10:33:52 -0700
Subject: i965: Silence unused parameter warnings in intel_fbo.c

intel_fbo.c: In function 'intel_alloc_window_storage':
intel_fbo.c:415:48: warning: unused parameter 'ctx' [-Wunused-parameter]
 intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                                                ^
intel_fbo.c: In function 'intel_nop_alloc_storage':
intel_fbo.c:428:74: warning: unused parameter 'rb' [-Wunused-parameter]
 intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                                                                          ^
intel_fbo.c:429:32: warning: unused parameter 'internalFormat' [-Wunused-parameter]
                         GLenum internalFormat, GLuint width, GLuint height)
                                ^
intel_fbo.c:429:55: warning: unused parameter 'width' [-Wunused-parameter]
                         GLenum internalFormat, GLuint width, GLuint height)
                                                       ^
intel_fbo.c:429:69: warning: unused parameter 'height' [-Wunused-parameter]
                         GLenum internalFormat, GLuint width, GLuint height)
                                                                     ^
intel_fbo.c: In function 'intel_blit_framebuffer_with_blitter':
intel_fbo.c:790:61: warning: unused parameter 'filter' [-Wunused-parameter]
                                     GLbitfield mask, GLenum filter)
                                                             ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i965/intel_fbo.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index af44325988a..b2160552d33 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -412,6 +412,7 @@ static GLboolean
 intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                            GLenum internalFormat, GLuint width, GLuint height)
 {
+   (void) ctx;
    assert(rb->Name == 0);
    rb->Width = width;
    rb->Height = height;
@@ -425,6 +426,10 @@ static GLboolean
 intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                         GLenum internalFormat, GLuint width, GLuint height)
 {
+   (void) rb;
+   (void) internalFormat;
+   (void) width;
+   (void) height;
    _mesa_problem(ctx, "intel_nop_alloc_storage should never be called.");
    return false;
 }
@@ -784,7 +789,7 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
                                     GLint srcX1, GLint srcY1,
                                     GLint dstX0, GLint dstY0,
                                     GLint dstX1, GLint dstY1,
-                                    GLbitfield mask, GLenum filter)
+                                    GLbitfield mask)
 {
    struct brw_context *brw = brw_context(ctx);
 
@@ -904,7 +909,7 @@ intel_blit_framebuffer(struct gl_context *ctx,
    mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
                                               srcX0, srcY0, srcX1, srcY1,
                                               dstX0, dstY0, dstX1, dstY1,
-                                              mask, filter);
+                                              mask);
    if (mask == 0x0)
       return;
 
@@ -942,7 +947,7 @@ gen4_blit_framebuffer(struct gl_context *ctx,
    mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
                                               srcX0, srcY0, srcX1, srcY1,
                                               dstX0, dstY0, dstX1, dstY1,
-                                              mask, filter);
+                                              mask);
    if (mask == 0x0)
       return;
 
-- 
cgit v1.2.3


From 094877f9d23169b1d209fb0c97f9b6d4679842d9 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 14:02:18 -0700
Subject: i965: Silence unused parameter warnings in intel_mipmap_tree.c

The target parameter of compute_msaa_layout appears to be unused since
83b83fb when support for CMS textures was added for Gen7.

The brw parameter of intel_get_non_msrt_mcs_alignment appears to be
unused since e92fbdc when the GEN check (along with the "can we fast
clear" decision) was moved to a different function.

intel_mipmap_tree.c: In function 'compute_msaa_layout':
intel_mipmap_tree.c:62:73: warning: unused parameter 'target' [-Wunused-parameter]
 compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
                                                                         ^
intel_mipmap_tree.c: In function 'intel_get_non_msrt_mcs_alignment':
intel_mipmap_tree.c:143:54: warning: unused parameter 'brw' [-Wunused-parameter]
 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
                                                      ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 8 ++++----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c   | 9 ++++-----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h   | 3 +--
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index f5ecbb54989..eb201736c6e 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
 }
 
 static void
-get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
+get_fast_clear_rect(struct gl_framebuffer *fb,
                     struct intel_renderbuffer *irb, struct rect *rect)
 {
    unsigned int x_align, y_align;
@@ -226,7 +226,7 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
        * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
        * with X alignment multiplied by 16 and Y alignment multiplied by 32.
        */
-      intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
+      intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
       x_align *= 16;
       y_align *= 32;
 
@@ -516,7 +516,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
          irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
          irb->need_downsample = true;
          fast_clear_buffers |= 1 << index;
-         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
+         get_fast_clear_rect(fb, irb, &fast_clear_rect);
          break;
 
       case REP_CLEAR:
@@ -653,7 +653,7 @@ get_resolve_rect(struct brw_context *brw,
     * by 8 and 16 and 8 and 8 for SKL.
     */
 
-   intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
+   intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
    if (brw->gen >= 9) {
       x_scaledown = x_align * 8;
       y_scaledown = y_align * 8;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index eafd2a6be9a..97e7e73ace9 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -57,7 +57,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
  * created, based on the chip generation and the surface type.
  */
 static enum intel_msaa_layout
-compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
+compute_msaa_layout(struct brw_context *brw, mesa_format format,
                     bool disable_aux_buffers)
 {
    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
@@ -138,8 +138,7 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
  *   by half the block width, and Y coordinates by half the block height.
  */
 void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height)
 {
    switch (mt->tiling) {
@@ -320,7 +319,7 @@ intel_miptree_create_layout(struct brw_context *brw,
    if (num_samples > 1) {
       /* Adjust width/height/depth for MSAA */
       mt->msaa_layout = compute_msaa_layout(brw, format,
-                                            mt->target, mt->disable_aux_buffers);
+                                            mt->disable_aux_buffers);
       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
           * "If the surface is multisampled and it is a depth or stencil
@@ -1425,7 +1424,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
    const mesa_format format = MESA_FORMAT_R_UINT32;
    unsigned block_width_px;
    unsigned block_height;
-   intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
+   intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
    unsigned width_divisor = block_width_px * 4;
    unsigned height_divisor = block_height * 8;
    unsigned mcs_width =
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 7b5f97b3368..0e92b95de5d 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -515,8 +515,7 @@ struct intel_mipmap_tree
 };
 
 void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height);
 bool
 intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
-- 
cgit v1.2.3


From 68b44dd5b23c0fca8c09357ffb5d6826f8b4b344 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 14:33:59 -0700
Subject: i915, i965: Silence unused parameter warnings in intel_mipmap_tree.h

These only occurred in release builds, but they occurred in every file
that included intel_mipmap_tree.h.  Lots of spam. :(

intel_mipmap_tree.h: In function 'intel_miptree_check_level_layer':
intel_mipmap_tree.h:595:59: warning: unused parameter 'mt' [-Wunused-parameter]
 intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
                                                           ^
intel_mipmap_tree.h:596:42: warning: unused parameter 'level' [-Wunused-parameter]
                                 uint32_t level,
                                          ^
intel_mipmap_tree.h:597:42: warning: unused parameter 'layer' [-Wunused-parameter]
                                 uint32_t layer)
                                          ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_mipmap_tree.h | 4 ++++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
index 77b1f541a91..d711a29d50c 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -285,6 +285,10 @@ intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
                                 uint32_t level,
                                 uint32_t layer)
 {
+   (void) mt;
+   (void) level;
+   (void) layer;
+
    assert(level >= mt->first_level);
    assert(level <= mt->last_level);
    assert(layer < mt->level[level].depth);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 0e92b95de5d..4907438dee5 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -594,6 +594,10 @@ intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
                                 uint32_t level,
                                 uint32_t layer)
 {
+   (void) mt;
+   (void) level;
+   (void) layer;
+
    assert(level >= mt->first_level);
    assert(level <= mt->last_level);
    assert(layer < mt->level[level].depth);
-- 
cgit v1.2.3


From 31f0967fb50101437d2568e9ab9640ffbcbf7ef9 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 14:43:18 -0700
Subject: i965: Make intel_miptree_map_raw static

This hasn't been used outside intel_mipmap_tree.c since d5d4ba9 started
using meta instead of the blitter for PBO TexSubImage.  While we're
here, remove the unused brw parameter from the function formerly known
as intel_miptree_unmap_raw.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 30 +++++++++++++++------------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  6 ------
 2 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 97e7e73ace9..1e7a6cc5c89 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -47,6 +47,11 @@
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
+static void *intel_miptree_map_raw(struct brw_context *brw,
+                                   struct intel_mipmap_tree *mt);
+
+static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
+
 static bool
 intel_miptree_alloc_mcs(struct brw_context *brw,
                         struct intel_mipmap_tree *mt,
@@ -1398,7 +1403,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
     */
    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
    memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
-   intel_miptree_unmap_raw(brw, mt->mcs_mt);
+   intel_miptree_unmap_raw(mt->mcs_mt);
    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
 
    return mt->mcs_mt;
@@ -2070,8 +2075,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
 }
 
 void
-intel_miptree_unmap_raw(struct brw_context *brw,
-                        struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
 {
    drm_intel_bo_unmap(mt->bo);
 }
@@ -2128,7 +2132,7 @@ intel_miptree_unmap_gtt(struct brw_context *brw,
 			unsigned int level,
 			unsigned int slice)
 {
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
 }
 
 static void
@@ -2189,7 +2193,7 @@ intel_miptree_unmap_blit(struct brw_context *brw,
 {
    struct gl_context *ctx = &brw->ctx;
 
-   intel_miptree_unmap_raw(brw, map->mt);
+   intel_miptree_unmap_raw(map->mt);
 
    if (map->mode & GL_MAP_WRITE_BIT) {
       bool ok = intel_miptree_blit(brw,
@@ -2261,7 +2265,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,
       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
    }
 
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
 }
 
 static void
@@ -2310,7 +2314,7 @@ intel_miptree_map_s8(struct brw_context *brw,
 	 }
       }
 
-      intel_miptree_unmap_raw(brw, mt);
+      intel_miptree_unmap_raw(mt);
 
       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
 	  map->x, map->y, map->w, map->h,
@@ -2346,7 +2350,7 @@ intel_miptree_unmap_s8(struct brw_context *brw,
 	 }
       }
 
-      intel_miptree_unmap_raw(brw, mt);
+      intel_miptree_unmap_raw(mt);
    }
 
    free(map->buffer);
@@ -2400,7 +2404,7 @@ intel_miptree_unmap_etc(struct brw_context *brw,
                                map->ptr, map->stride,
                                map->w, map->h, mt->etc_format);
 
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
    free(map->buffer);
 }
 
@@ -2470,8 +2474,8 @@ intel_miptree_map_depthstencil(struct brw_context *brw,
 	 }
       }
 
-      intel_miptree_unmap_raw(brw, s_mt);
-      intel_miptree_unmap_raw(brw, z_mt);
+      intel_miptree_unmap_raw(s_mt);
+      intel_miptree_unmap_raw(z_mt);
 
       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
 	  __func__,
@@ -2530,8 +2534,8 @@ intel_miptree_unmap_depthstencil(struct brw_context *brw,
 	 }
       }
 
-      intel_miptree_unmap_raw(brw, s_mt);
-      intel_miptree_unmap_raw(brw, z_mt);
+      intel_miptree_unmap_raw(s_mt);
+      intel_miptree_unmap_raw(z_mt);
 
       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
 	  __func__,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 4907438dee5..bcf6d00691a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -772,12 +772,6 @@ brw_miptree_layout(struct brw_context *brw,
                    struct intel_mipmap_tree *mt,
                    uint32_t layout_flags);
 
-void *intel_miptree_map_raw(struct brw_context *brw,
-                            struct intel_mipmap_tree *mt);
-
-void intel_miptree_unmap_raw(struct brw_context *brw,
-                             struct intel_mipmap_tree *mt);
-
 void
 intel_miptree_map(struct brw_context *brw,
 		  struct intel_mipmap_tree *mt,
-- 
cgit v1.2.3


From 20915dd2e0539ec5eba958230bbda2215cb19c2c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 14:56:18 -0700
Subject: i915: Remove prototype for nonexistent brw_miptree_layout

Hasn't existed in the i915 source since the i915 and i965 drivers parted
ways.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_mipmap_tree.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
index d711a29d50c..f6ed71733f2 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -344,8 +344,6 @@ intel_miptree_copy_teximage(struct intel_context *intel,
  */
 void i915_miptree_layout(struct intel_mipmap_tree *mt);
 void i945_miptree_layout(struct intel_mipmap_tree *mt);
-void brw_miptree_layout(struct intel_context *intel,
-			struct intel_mipmap_tree *mt);
 
 void *intel_miptree_map_raw(struct intel_context *intel,
                             struct intel_mipmap_tree *mt);
-- 
cgit v1.2.3


From 04122312662b60d8a2d472ff6492960b4d923587 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 15:01:18 -0700
Subject: i915: Silence unused parameter warnings

intel_mipmap_tree.c: In function 'old_intel_miptree_unmap_raw':
intel_mipmap_tree.c:726:51: warning: unused parameter 'intel' [-Wunused-parameter]
 intel_miptree_unmap_raw(struct intel_context *intel,
                                                   ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_mipmap_tree.c  | 7 +++----
 src/mesa/drivers/dri/i915/intel_mipmap_tree.h  | 3 +--
 src/mesa/drivers/dri/i915/intel_tex_subimage.c | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
index 1aa06c18f15..0fcc2b16b11 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -723,8 +723,7 @@ intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
 }
 
 void
-intel_miptree_unmap_raw(struct intel_context *intel,
-                        struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
 {
    drm_intel_bo_unmap(mt->region->bo);
 }
@@ -778,7 +777,7 @@ intel_miptree_unmap_gtt(struct intel_context *intel,
 			unsigned int level,
 			unsigned int slice)
 {
-   intel_miptree_unmap_raw(intel, mt);
+   intel_miptree_unmap_raw(mt);
 }
 
 static void
@@ -833,7 +832,7 @@ intel_miptree_unmap_blit(struct intel_context *intel,
 {
    struct gl_context *ctx = &intel->ctx;
 
-   intel_miptree_unmap_raw(intel, map->mt);
+   intel_miptree_unmap_raw(map->mt);
 
    if (map->mode & GL_MAP_WRITE_BIT) {
       bool ok = intel_miptree_blit(intel,
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
index f6ed71733f2..aab30ed1f55 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -348,8 +348,7 @@ void i945_miptree_layout(struct intel_mipmap_tree *mt);
 void *intel_miptree_map_raw(struct intel_context *intel,
                             struct intel_mipmap_tree *mt);
 
-void intel_miptree_unmap_raw(struct intel_context *intel,
-                             struct intel_mipmap_tree *mt);
+void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
 
 void
 intel_miptree_map(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
index f11ef2ea329..4083d696b82 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
@@ -102,7 +102,7 @@ intel_blit_texsubimage(struct gl_context * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
    }
 
-   intel_miptree_unmap_raw(intel, temp_mt);
+   intel_miptree_unmap_raw(temp_mt);
 
    bool ret;
 
-- 
cgit v1.2.3


From 5c8aa21309cb8cabd61b73b6706b295d6237d53d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 15:04:59 -0700
Subject: i915, i965: Silence unused parameter warnings in
 intel_miptree_unmap_gtt

intel_mipmap_tree.c: In function 'intel_miptree_unmap_gtt':
intel_mipmap_tree.c:777:34: warning: unused parameter 'map' [-Wunused-parameter]
    struct intel_miptree_map *map,
                                  ^
intel_mipmap_tree.c:778:17: warning: unused parameter 'level' [-Wunused-parameter]
    unsigned int level,
                 ^
intel_mipmap_tree.c:779:17: warning: unused parameter 'slice' [-Wunused-parameter]
    unsigned int slice)
                 ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_mipmap_tree.c | 8 ++------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 ++------
 2 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
index 0fcc2b16b11..307ad4df891 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -771,11 +771,7 @@ intel_miptree_map_gtt(struct intel_context *intel,
 }
 
 static void
-intel_miptree_unmap_gtt(struct intel_context *intel,
-			struct intel_mipmap_tree *mt,
-			struct intel_miptree_map *map,
-			unsigned int level,
-			unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
 {
    intel_miptree_unmap_raw(mt);
 }
@@ -948,7 +944,7 @@ intel_miptree_unmap(struct intel_context *intel,
    if (map->mt) {
       intel_miptree_unmap_blit(intel, mt, map, level, slice);
    } else {
-      intel_miptree_unmap_gtt(intel, mt, map, level, slice);
+      intel_miptree_unmap_gtt(mt);
    }
 
    intel_miptree_release_map(mt, level, slice);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 1e7a6cc5c89..2150708fc04 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2126,11 +2126,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
 }
 
 static void
-intel_miptree_unmap_gtt(struct brw_context *brw,
-			struct intel_mipmap_tree *mt,
-			struct intel_miptree_map *map,
-			unsigned int level,
-			unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
 {
    intel_miptree_unmap_raw(mt);
 }
@@ -2736,7 +2732,7 @@ intel_miptree_unmap(struct brw_context *brw,
       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
 #endif
    } else {
-      intel_miptree_unmap_gtt(brw, mt, map, level, slice);
+      intel_miptree_unmap_gtt(mt);
    }
 
    intel_miptree_release_map(mt, level, slice);
-- 
cgit v1.2.3


From 307d5e58496a73bace8c89dbdbba91be171b7c95 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 15:11:34 -0700
Subject: i915: Silence unused parameter warning in intel_miptree_create_layout

The for_bo parameter of intel_miptree_create_layout appears to be unused
since 27eedca when Eric removed some Gen5 code (after the i915 and i965
drivers parted ways).

intel_mipmap_tree.c: In function 'old_intel_miptree_create_layout':
intel_mipmap_tree.c:77:35: warning: unused parameter 'for_bo' [-Wunused-parameter]
                             bool for_bo)
                                   ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_mipmap_tree.c | 14 +++-----------
 src/mesa/drivers/dri/i915/intel_mipmap_tree.h |  3 +--
 src/mesa/drivers/dri/i915/intel_tex_image.c   |  3 +--
 3 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
index 307ad4df891..5cbf7634fcc 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -60,11 +60,6 @@ target_to_target(GLenum target)
    }
 }
 
-/**
- * @param for_bo Indicates that the caller is
- *        intel_miptree_create_for_bo(). If true, then do not create
- *        \c stencil_mt.
- */
 struct intel_mipmap_tree *
 intel_miptree_create_layout(struct intel_context *intel,
                             GLenum target,
@@ -73,8 +68,7 @@ intel_miptree_create_layout(struct intel_context *intel,
                             GLuint last_level,
                             GLuint width0,
                             GLuint height0,
-                            GLuint depth0,
-                            bool for_bo)
+                            GLuint depth0)
 {
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
    if (!mt)
@@ -181,8 +175,7 @@ intel_miptree_create(struct intel_context *intel,
 
    mt = intel_miptree_create_layout(intel, target, format,
 				      first_level, last_level, width0,
-				      height0, depth0,
-				      false);
+				      height0, depth0);
    /*
     * pitch == 0 || height == 0  indicates the null texture
     */
@@ -262,8 +255,7 @@ intel_miptree_create_for_bo(struct intel_context *intel,
 
    mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
                                     0, 0,
-                                    width, height, 1,
-                                    true);
+                                    width, height, 1);
    if (!mt) {
       free(region);
       return mt;
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
index aab30ed1f55..2520b3035b2 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -240,8 +240,7 @@ intel_miptree_create_layout(struct intel_context *intel,
                             GLuint last_level,
                             GLuint width0,
                             GLuint height0,
-                            GLuint depth0,
-                            bool for_bo);
+                            GLuint depth0);
 
 struct intel_mipmap_tree *
 intel_miptree_create_for_bo(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c
index 5ab60d16173..63ef08b44a6 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_image.c
@@ -241,8 +241,7 @@ intel_set_texture_image_region(struct gl_context *ctx,
 
    intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat,
                                                  0, 0,
-                                                 width, height, 1,
-                                                 true);
+                                                 width, height, 1);
    if (intel_image->mt == NULL)
        return;
    intel_region_reference(&intel_image->mt->region, region);
-- 
cgit v1.2.3


From 86c0a2d57413f04fabd40fd09d87b0ff9d1cb092 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 15:44:04 -0700
Subject: i915, i965: Silence unused parameter warnings in
 intel_batchbuffer_advance

These only occurred in release builds, but they occurred in every file
that included intel_batchbuffer.h.  Lots of spam. :(

intel_batchbuffer.h: In function 'intel_batchbuffer_advance':
intel_batchbuffer.h:153:47: warning: unused parameter 'brw' [-Wunused-parameter]
 intel_batchbuffer_advance(struct brw_context *brw)
                                               ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i915/intel_batchbuffer.h | 2 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.h b/src/mesa/drivers/dri/i915/intel_batchbuffer.h
index feecc01394c..c4efa762bcb 100644
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.h
@@ -128,6 +128,8 @@ intel_batchbuffer_advance(struct intel_context *intel)
       abort();
    }
    batch->total = 0;
+#else
+   (void) intel;
 #endif
 }
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 84add927c9a..2b177d3a888 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -162,6 +162,8 @@ intel_batchbuffer_advance(struct brw_context *brw)
       abort();
    }
    batch->total = 0;
+#else
+   (void) brw;
 #endif
 }
 
-- 
cgit v1.2.3


From 767c33e88138afa64443417860b264a494eba33d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Sep 2015 16:06:58 -0700
Subject: meta: Always bind the texture

We may have been called from glGenerateTextureMipmap with CurrentUnit
still set to 0, so we don't know when we can skip binding the texture.
Assume that _mesa_BindTexture will be fast if we're rebinding the same
texture.

v2: Remove currentTexUnitSave because it is now unused.  Suggested by
both Neil and Anuj.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91847
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta_generate_mipmap.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index 0655f052219..5dc40a2aa33 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -163,7 +163,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
    const GLuint maxLevel = texObj->MaxLevel;
    const GLint maxLevelSave = texObj->MaxLevel;
    const GLboolean genMipmapSave = texObj->GenerateMipmap;
-   const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
    const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
                                       ctx->Extensions.ARB_fragment_shader;
    GLenum faceTarget;
@@ -202,8 +201,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
    samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
       ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
 
-   if (currentTexUnitSave != 0)
-      _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+    * still set to 0, so we don't know when we can skip binding the texture.
+    * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    * texture.
+    */
+   _mesa_BindTexture(target, texObj->Name);
 
    if (!mipmap->Sampler) {
       _mesa_GenSamplers(1, &mipmap->Sampler);
-- 
cgit v1.2.3


From 4603723722127e707a5c1fa28736ee932f326846 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 9 Sep 2015 10:27:04 -0700
Subject: meta: Use result of texture coordinate clamping operation

Previously the result of the complicated clamp() expression just dropped
on the floor: clamp does not modify any of its parameters.  Looking at
the surrounding code, I believe this is supposed to modify the value of
tex_coord.

This change (along with a change to avoid the use of
brw_blorp_framebuffer) does not affect any existing piglit tests.  I'm
not sure what this clamp is trying to accomplish, so I'm not sure how to
write a test to exercise this path.

I also noticed another bug in this code.  There is no way the array
texture case could possibly work.  This will generate code for the
TEXEL_FETCH macro like:

    #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), sample_map[int(2 * fract(coord.x))]);

Since the coord parameter of this macro is a vec2 at all invocations, no
expansion of this macro will even compile.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: Topi Pohjolainen <topi.pohjolainen@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/common/meta_blit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 71d18de87db..a41fe42338f 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                "   vec2 tex_coord = texCoords - s_0_offset;\n"
                                "\n"
                                "   tex_coord *= scale;\n"
-                               "   clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
-                               "   clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
+                               "   tex_coord.x = clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
+                               "   tex_coord.y = clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
                                "   interp = fract(tex_coord);\n"
                                "   tex_coord = ivec2(tex_coord) * scale_inv;\n"
                                "\n"
-- 
cgit v1.2.3


From 2fc0ce293ac58237f02cc5dd2eee4e35abea06b5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 5 Sep 2015 00:22:57 -0700
Subject: glsl: Use hash tables in opt_constant_variable().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cuts compile/link time of the fragment shader in bug #91857 by 31%
(31.79 -> 21.64).  It has over 8,000 variables so linked lists are
terrible.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Tested-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/opt_constant_variable.cpp | 39 ++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp
index 7aaaeedf98d..cdfbc340243 100644
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -36,11 +36,11 @@
 #include "ir_visitor.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"
 
 namespace {
 
 struct assignment_entry {
-   exec_node link;
    int assignment_count;
    ir_variable *var;
    ir_constant *constval;
@@ -54,31 +54,32 @@ public:
    virtual ir_visitor_status visit_enter(ir_assignment *);
    virtual ir_visitor_status visit_enter(ir_call *);
 
-   exec_list list;
+   struct hash_table *ht;
 };
 
 } /* unnamed namespace */
 
 static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
 {
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
    struct assignment_entry *entry;
 
-   foreach_list_typed(struct assignment_entry, entry, link, list) {
-      if (entry->var == var)
-	 return entry;
+   if (hte) {
+      entry = (struct assignment_entry *) hte->data;
+   } else {
+      entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+      entry->var = var;
+      _mesa_hash_table_insert(ht, var, entry);
    }
 
-   entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
-   entry->var = var;
-   list->push_head(&entry->link);
    return entry;
 }
 
 ir_visitor_status
 ir_constant_variable_visitor::visit(ir_variable *ir)
 {
-   struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
    entry->our_scope = true;
    return visit_continue;
 }
@@ -97,7 +98,7 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
    ir_constant *constval;
    struct assignment_entry *entry;
 
-   entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
    assert(entry);
    entry->assignment_count++;
 
@@ -150,7 +151,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
 	 struct assignment_entry *entry;
 
 	 assert(var);
-	 entry = get_assignment_entry(var, &this->list);
+	 entry = get_assignment_entry(var, this->ht);
 	 entry->assignment_count++;
       }
    }
@@ -161,7 +162,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
       struct assignment_entry *entry;
 
       assert(var);
-      entry = get_assignment_entry(var, &this->list);
+      entry = get_assignment_entry(var, this->ht);
       entry->assignment_count++;
    }
 
@@ -177,20 +178,22 @@ do_constant_variable(exec_list *instructions)
    bool progress = false;
    ir_constant_variable_visitor v;
 
+   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
    v.run(instructions);
 
-   while (!v.list.is_empty()) {
-
-      struct assignment_entry *entry;
-      entry = exec_node_data(struct assignment_entry, v.list.head, link);
+   struct hash_entry *hte;
+   hash_table_foreach(v.ht, hte) {
+      struct assignment_entry *entry = (struct assignment_entry *) hte->data;
 
       if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
 	 entry->var->constant_value = entry->constval;
 	 progress = true;
       }
-      entry->link.remove();
+      hte->data = NULL;
       free(entry);
    }
+   _mesa_hash_table_destroy(v.ht, NULL);
 
    return progress;
 }
-- 
cgit v1.2.3


From e20f30eb5181cddf8286d2247cfaf7e0fac7e417 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 5 Sep 2015 00:51:33 -0700
Subject: i965: Use hash tables for brw_fs_vector_splitting().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cuts compile/link time of the fragment shader in #91857 by 25%
(21.64 -> 16.28).

v2: Drop unnecessary _mesa_hash_table_destroy call, and use
    refs.ht->entries == 0 rather than ad-hoc checking (suggested by
    Timothy Arceri).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Tested-by: Tapani Pälli <tapani.palli@intel.com>
---
 .../drivers/dri/i965/brw_fs_vector_splitting.cpp   | 44 +++++++++++-----------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 96d4f375da2..9e92ae85e43 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -43,6 +43,7 @@
 #include "glsl/ir_visitor.h"
 #include "glsl/ir_rvalue_visitor.h"
 #include "glsl/glsl_types.h"
+#include "util/hash_table.h"
 
 static bool debug = false;
 
@@ -72,7 +73,8 @@ public:
    ir_vector_reference_visitor(void)
    {
       this->mem_ctx = ralloc_context(NULL);
-      this->variable_list.make_empty();
+      this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    }
 
    ~ir_vector_reference_visitor(void)
@@ -89,7 +91,7 @@ public:
    variable_entry *get_variable_entry(ir_variable *var);
 
    /* List of variable_entry */
-   exec_list variable_list;
+   struct hash_table *ht;
 
    void *mem_ctx;
 };
@@ -119,13 +121,12 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
       break;
    }
 
-   foreach_in_list(variable_entry, entry, &variable_list) {
-      if (entry->var == var)
-	 return entry;
-   }
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   if (hte)
+      return (struct variable_entry *) hte->data;
 
    variable_entry *entry = new(mem_ctx) variable_entry(var);
-   this->variable_list.push_tail(entry);
+   _mesa_hash_table_insert(ht, var, entry);
    return entry;
 }
 
@@ -195,9 +196,9 @@ ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
 
 class ir_vector_splitting_visitor : public ir_rvalue_visitor {
 public:
-   ir_vector_splitting_visitor(exec_list *vars)
+   ir_vector_splitting_visitor(struct hash_table *vars)
    {
-      this->variable_list = vars;
+      this->ht = vars;
    }
 
    virtual ir_visitor_status visit_leave(ir_assignment *);
@@ -205,7 +206,7 @@ public:
    void handle_rvalue(ir_rvalue **rvalue);
    variable_entry *get_splitting_entry(ir_variable *var);
 
-   exec_list *variable_list;
+   struct hash_table *ht;
 };
 
 variable_entry *
@@ -216,13 +217,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
    if (!var->type->is_vector())
       return NULL;
 
-   foreach_in_list(variable_entry, entry, variable_list) {
-      if (entry->var == var) {
-	 return entry;
-      }
-   }
-
-   return NULL;
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   return hte ? (struct variable_entry *) hte->data : NULL;
 }
 
 void
@@ -329,12 +325,15 @@ ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
 bool
 brw_do_vector_splitting(exec_list *instructions)
 {
+   struct hash_entry *hte;
+
    ir_vector_reference_visitor refs;
 
    visit_list_elements(&refs, instructions);
 
    /* Trim out variables we can't split. */
-   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
       if (debug) {
 	 fprintf(stderr, "vector %s@%p: whole_access %d\n",
                  entry->var->name, (void *) entry->var,
@@ -342,11 +341,11 @@ brw_do_vector_splitting(exec_list *instructions)
       }
 
       if (entry->whole_vector_access) {
-	 entry->remove();
+         _mesa_hash_table_remove(refs.ht, hte);
       }
    }
 
-   if (refs.variable_list.is_empty())
+   if (refs.ht->entries == 0)
       return false;
 
    void *mem_ctx = ralloc_context(NULL);
@@ -354,7 +353,8 @@ brw_do_vector_splitting(exec_list *instructions)
    /* Replace the decls of the vectors to be split with their split
     * components.
     */
-   foreach_in_list(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
       const struct glsl_type *type;
       type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
 
@@ -378,7 +378,7 @@ brw_do_vector_splitting(exec_list *instructions)
       entry->var->remove();
    }
 
-   ir_vector_splitting_visitor split(&refs.variable_list);
+   ir_vector_splitting_visitor split(refs.ht);
    visit_list_elements(&split, instructions);
 
    ralloc_free(mem_ctx);
-- 
cgit v1.2.3


From 4654439fdd766f79a78fe0d812fd916f5815e7e6 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 11 Nov 2014 23:16:13 -0800
Subject: glsl: Use hash tables for opt_constant_propagation() kill sets.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cuts compile/link time of the fragment shader in #91857 by 19%
(16.28 -> 13.05).

I didn't bother with the acp sets because they're smaller, but it
might be worth doing as well.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Tested-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/opt_constant_propagation.cpp | 46 +++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp
index 5221417aca0..184aaa1c297 100644
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -40,6 +40,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"
 
 namespace {
 
@@ -95,7 +96,8 @@ public:
       killed_all = false;
       mem_ctx = ralloc_context(0);
       this->acp = new(mem_ctx) exec_list;
-      this->kills = new(mem_ctx) exec_list;
+      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
    }
    ~ir_constant_propagation_visitor()
    {
@@ -123,7 +125,7 @@ public:
     * List of kill_entry: The masks of variables whose values were
     * killed in this block.
     */
-   exec_list *kills;
+   hash_table *kills;
 
    bool progress;
 
@@ -263,11 +265,12 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
     * main() at link time, so they're irrelevant to us.
     */
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    visit_list_elements(this, &ir->body);
@@ -352,11 +355,12 @@ void
 ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
 {
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    /* Populate the initial acp with a constant of the original */
@@ -370,12 +374,14 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
       orig_acp->make_empty();
    }
 
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
       kill(k->var, k->write_mask);
    }
 }
@@ -397,7 +403,7 @@ ir_visitor_status
 ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 {
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    /* FINISHME: For now, the initial acp for loops is totally empty.
@@ -405,7 +411,8 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
     * cloned minus the killed entries after the first run through.
     */
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    visit_list_elements(this, &ir->body_instructions);
@@ -414,12 +421,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
       orig_acp->make_empty();
    }
 
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
       kill(k->var, k->write_mask);
    }
 
@@ -448,14 +457,15 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
    /* Add this writemask of the variable to the list of killed
     * variables in this block.
     */
-   foreach_in_list(kill_entry, entry, this->kills) {
-      if (entry->var == var) {
-	 entry->write_mask |= write_mask;
-	 return;
-      }
+   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+   if (kill_hash_entry) {
+      kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+      entry->write_mask |= write_mask;
+      return;
    }
    /* Not already in the list.  Make new entry. */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+   _mesa_hash_table_insert(this->kills, var,
+                           new(this->mem_ctx) kill_entry(var, write_mask));
 }
 
 /**
-- 
cgit v1.2.3


From cb2b118e4017b1cd90dcc02e688d859b2709465e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 12 May 2015 01:53:24 -0700
Subject: nir/builder: Add nir_load_var() and nir_store_var() helpers.

These provide a convenient way to do simple variable loads and stores.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_builder.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index ba988d71dde..ffa31c90a45 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -234,4 +234,31 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
    return nir_imov_alu(build, alu, num_components);
 }
 
+static inline nir_ssa_def *
+nir_load_var(nir_builder *build, nir_variable *var)
+{
+   const unsigned num_components = glsl_get_vector_elements(var->type);
+
+   nir_intrinsic_instr *load =
+      nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+   load->num_components = num_components;
+   load->variables[0] = nir_deref_var_create(load, var);
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+   nir_builder_instr_insert(build, &load->instr);
+   return &load->dest.ssa;
+}
+
+static inline void
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+{
+   const unsigned num_components = glsl_get_vector_elements(var->type);
+
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+   store->num_components = num_components;
+   store->variables[0] = nir_deref_var_create(store, var);
+   store->src[0] = nir_src_for_ssa(value);
+   nir_builder_instr_insert(build, &store->instr);
+}
+
 #endif /* NIR_BUILDER_H */
-- 
cgit v1.2.3


From b811085b797c79396e59372085293a82fabcf069 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 18 Aug 2015 02:07:47 -0700
Subject: nir: Store some geometry shader data in nir_shader.

This makes it possible for NIR shaders to know the number of output
vertices and the number of invocations.  Drivers could also access
these directly without going through gl_program.

We should probably add InputType and OutputType here too, but currently
those are stored as GL_* enums, and I wanted to avoid using those in
NIR, as I suspect Vulkan/SPIR-V will use different enums.  (We should
probably make our own.)

We could add VerticesIn, but it's easily computable from the input
topology, so I'm not sure whether it's worth it.  It's also currently
not stored in gl_shader (only gl_shader_program), which would require
changes to the glsl_to_nir interface or require us to store it there.

This is a bit of duplication of data...ideally, we would factor these
substructs out of gl_program, gl_shader_program, and nir_shader, creating
a gl_geometry_info class...but it would need to go in a new place (in
src/glsl?) that isn't mtypes.h nor nir.h.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 3 +++
 src/glsl/nir/nir.c           | 3 +++
 src/glsl/nir/nir.h           | 8 ++++++++
 3 files changed, 14 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 944f7462911..c13f953d914 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -140,6 +140,9 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
 
    nir_lower_outputs_to_temporaries(shader);
 
+   shader->gs.vertices_out = sh->Geom.VerticesOut;
+   shader->gs.invocations = sh->Geom.Invocations;
+
    return shader;
 }
 
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index ab06ea2bc19..1dc7e12445f 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -54,6 +54,9 @@ nir_shader_create(void *mem_ctx,
 
    shader->stage = stage;
 
+   shader->gs.vertices_out = 0;
+   shader->gs.invocations = 0;
+
    return shader;
 }
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index a93ff11f01b..f9c829570c5 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1479,6 +1479,14 @@ typedef struct nir_shader {
 
    /** The shader stage, such as MESA_SHADER_VERTEX. */
    gl_shader_stage stage;
+
+   struct {
+      /** The maximum number of vertices the geometry shader might write. */
+      unsigned vertices_out;
+
+      /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+      unsigned invocations;
+   } gs;
 } nir_shader;
 
 #define nir_foreach_overload(shader, overload)                        \
-- 
cgit v1.2.3


From 2c52c794d727e535c1baca671a7c1e5b38dffb00 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 10 Sep 2015 12:26:18 -0600
Subject: tgsi,softpipe: capitalize the tgsi_sampler_control enum values

We use capitalized enum values everywhere else.
This improves understanding a bit too.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       | 34 ++++++++++----------
 src/gallium/auxiliary/tgsi/tgsi_exec.h       | 15 +++++----
 src/gallium/drivers/softpipe/sp_tex_sample.c | 48 ++++++++++++++--------------
 3 files changed, 49 insertions(+), 48 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a3a79a06620..f67c16200a9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2032,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
    const union tgsi_exec_channel *args[5], *proj = NULL;
    union tgsi_exec_channel r[5];
-   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
    uint chan;
    uint unit;
    int8_t offsets[3];
@@ -2078,11 +2078,11 @@ exec_tex(struct tgsi_exec_machine *mach,
          args[i] = &ZeroVec;
 
       if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
       else if (modifier == TEX_MODIFIER_LOD_BIAS)
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
       else if (modifier == TEX_MODIFIER_GATHER)
-         control = tgsi_sampler_gather;
+         control = TGSI_SAMPLER_GATHER;
    }
    else {
       for (i = dim; i < Elements(args); i++)
@@ -2159,7 +2159,7 @@ exec_lodq(struct tgsi_exec_machine *mach,
                             args[1]->f,
                             args[2]->f,
                             args[3]->f,
-                            tgsi_sampler_lod_none,
+                            TGSI_SAMPLER_LOD_NONE,
                             r[0].f,
                             r[1].f);
 
@@ -2195,7 +2195,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, unit, unit,
                   &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
@@ -2211,7 +2211,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, unit, unit,
                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
@@ -2225,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, unit, unit,
                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
@@ -2245,7 +2245,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, unit, unit,
                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
       break;
 
@@ -2265,7 +2265,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, unit, unit,
                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
       break;
 
@@ -2404,7 +2404,7 @@ exec_sample(struct tgsi_exec_machine *mach,
    const uint sampler_unit = inst->Src[2].Register.Index;
    union tgsi_exec_channel r[5], c1;
    const union tgsi_exec_channel *lod = &ZeroVec;
-   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
    uint chan;
    unsigned char swizzles[4];
    int8_t offsets[3];
@@ -2418,16 +2418,16 @@ exec_sample(struct tgsi_exec_machine *mach,
       if (modifier == TEX_MODIFIER_LOD_BIAS) {
          FETCH(&c1, 3, TGSI_CHAN_X);
          lod = &c1;
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
       }
       else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
          FETCH(&c1, 3, TGSI_CHAN_X);
          lod = &c1;
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
       }
       else {
          assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
-         control = tgsi_sampler_lod_zero;
+         control = TGSI_SAMPLER_LOD_ZERO;
       }
    }
 
@@ -2553,7 +2553,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
@@ -2569,7 +2569,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
       break;
 
@@ -2587,7 +2587,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                   &r[0], &r[1], &r[2], &r[3]);
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index a07d727cdee..5fc276c20b7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,14 @@ struct tgsi_interp_coef
    float dady[TGSI_NUM_CHANNELS];
 };
 
-enum tgsi_sampler_control {
-   tgsi_sampler_lod_none,
-   tgsi_sampler_lod_bias,
-   tgsi_sampler_lod_explicit,
-   tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit,
-   tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+   TGSI_SAMPLER_LOD_NONE,
+   TGSI_SAMPLER_LOD_BIAS,
+   TGSI_SAMPLER_LOD_EXPLICIT,
+   TGSI_SAMPLER_LOD_ZERO,
+   TGSI_SAMPLER_DERIVS_EXPLICIT,
+   TGSI_SAMPLER_GATHER,
 };
 
 /**
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 9f2ba01b66a..489cae7a371 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1831,19 +1831,19 @@ compute_lod(const struct pipe_sampler_state *sampler,
    uint i;
 
    switch (control) {
-   case tgsi_sampler_lod_none:
-   case tgsi_sampler_lod_zero:
+   case TGSI_SAMPLER_LOD_NONE:
+   case TGSI_SAMPLER_LOD_ZERO:
    /* XXX FIXME */
-   case tgsi_sampler_derivs_explicit:
+   case TGSI_SAMPLER_DERIVS_EXPLICIT:
       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
       break;
-   case tgsi_sampler_lod_bias:
+   case TGSI_SAMPLER_LOD_BIAS:
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
          lod[i] = biased_lambda + lod_in[i];
          lod[i] = CLAMP(lod[i], min_lod, max_lod);
       }
       break;
-   case tgsi_sampler_lod_explicit:
+   case TGSI_SAMPLER_LOD_EXPLICIT:
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
       }
@@ -1876,25 +1876,25 @@ compute_lambda_lod_unclamped(struct sp_sampler_view *sp_sview,
    uint i;
 
    switch (control) {
-   case tgsi_sampler_lod_none:
+   case TGSI_SAMPLER_LOD_NONE:
       /* XXX FIXME */
-   case tgsi_sampler_derivs_explicit:
+   case TGSI_SAMPLER_DERIVS_EXPLICIT:
       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
       lod[0] = lod[1] = lod[2] = lod[3] = lambda;
       break;
-   case tgsi_sampler_lod_bias:
+   case TGSI_SAMPLER_LOD_BIAS:
       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
          lod[i] = lambda + lod_in[i];
       }
       break;
-   case tgsi_sampler_lod_explicit:
+   case TGSI_SAMPLER_LOD_EXPLICIT:
       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
          lod[i] = lod_in[i] + lod_bias;
       }
       break;
-   case tgsi_sampler_lod_zero:
-   case tgsi_sampler_gather:
+   case TGSI_SAMPLER_LOD_ZERO:
+   case TGSI_SAMPLER_GATHER:
       lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
       break;
    default:
@@ -1994,7 +1994,7 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
    args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
    args.gather_comp = get_gather_component(lod_in);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
@@ -2078,7 +2078,7 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
    struct img_filter_args args;
 
    args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
    args.gather_comp = get_gather_component(lod_in);
 
    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
@@ -2140,7 +2140,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
 
    args.level = sp_sview->base.u.tex.first_level;
    args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
 
    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
@@ -2188,7 +2188,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
    struct img_filter_args args;
    args.level = sp_sview->base.u.tex.first_level;
    args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
       args.s = s[j];
       args.t = t[j];
@@ -2465,10 +2465,10 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
    struct img_filter_args args;
 
-   if (filt_args->control == tgsi_sampler_lod_bias ||
-       filt_args->control == tgsi_sampler_lod_none ||
+   if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
+       filt_args->control == TGSI_SAMPLER_LOD_NONE ||
        /* XXX FIXME */
-       filt_args->control == tgsi_sampler_derivs_explicit) {
+       filt_args->control == TGSI_SAMPLER_DERIVS_EXPLICIT) {
       /* note: instead of working with Px and Py, we will use the 
        * squared length instead, to avoid sqrt.
        */
@@ -2508,8 +2508,8 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
       compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
    }
    else {
-      assert(filt_args->control == tgsi_sampler_lod_explicit ||
-             filt_args->control == tgsi_sampler_lod_zero);
+      assert(filt_args->control == TGSI_SAMPLER_LOD_EXPLICIT ||
+             filt_args->control == TGSI_SAMPLER_LOD_ZERO);
       compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
    }
    
@@ -2592,7 +2592,7 @@ mip_filter_linear_2d_linear_repeat_POT(
       args.p = p[j];
       args.face_id = sp_sview->faces[j];
       args.offset = filt_args->offset;
-      args.gather_only = filt_args->control == tgsi_sampler_gather;
+      args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
       if ((unsigned)level0 >= psview->u.tex.last_level) {
          if (level0 < 0)
             args.level = psview->u.tex.first_level;
@@ -2672,7 +2672,7 @@ sample_compare(struct sp_sampler_view *sp_sview,
    float pc[4];
    const struct util_format_description *format_desc;
    unsigned chan_type;
-   bool is_gather = (control == tgsi_sampler_gather);
+   bool is_gather = (control == TGSI_SAMPLER_GATHER);
 
    /**
     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -3056,7 +3056,7 @@ get_filters(struct sp_sampler_view *sp_sview,
             img_filter_func *mag)
 {
    assert(funcs);
-   if (control == tgsi_sampler_gather) {
+   if (control == TGSI_SAMPLER_GATHER) {
       *funcs = &funcs_nearest;
       if (min) {
          *min = get_img_filter(sp_sview, &sp_samp->base,
@@ -3106,7 +3106,7 @@ sample_mip(struct sp_sampler_view *sp_sview,
                      lod, filt_args->control, rgba);
    }
 
-   if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
+   if (sp_sview->need_swizzle && filt_args->control != TGSI_SAMPLER_GATHER) {
       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
       do_swizzling(&sp_sview->base, rgba_temp, rgba);
-- 
cgit v1.2.3


From d4e29af2344c06490913efc35430f93a966061bb Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Fri, 11 Sep 2015 12:21:13 +0200
Subject: i965/vec4: check writemask when bailing out at register coalesce

opt_register_coalesce stopped to check previous instructions to
coalesce with if somebody else was writing on the same
destination. This can be optimized to check if somebody else was
writing to the same channels of the same destination using the
writemask.

Shader DB results (taking into account only vec4):

total instructions in shared programs: 1781593 -> 1734957 (-2.62%)
instructions in affected programs:     1238390 -> 1191754 (-3.77%)
helped:                                12782
HURT:                                  0
GAINED:                                0
LOST:                                  0

v2: removed some parenthesis, fixed indentation, as suggested by
    Matt Turner
v3: added brackets, for consistency, as suggested by Eduardo Lima

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 893ff356afa..c4da1a11be8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1090,11 +1090,13 @@ vec4_visitor::opt_register_coalesce()
 	 if (interfered)
 	    break;
 
-         /* If somebody else writes our destination here, we can't coalesce
-          * before that.
+         /* If somebody else writes the same channels of our destination here,
+          * we can't coalesce before that.
           */
-         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-	    break;
+         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+             (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+            break;
+         }
 
          /* Check for reads of the register we're trying to coalesce into.  We
           * can't go rewriting instructions above that to put some other value
-- 
cgit v1.2.3


From 794355e77167291a3acc8d42249c760c24f54f38 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 13:56:08 -0700
Subject: nir/lower_outputs_to_temporaries: Reparent the output name

We copy the output, make the old output the temporary, and give the
temporary a new name.  The copy keeps the pointer to the old name.  This
works just fine up until the point where we lower things to SSA and delete
the old variable and, with it, the name.  Instead, we should re-parent to
the copy.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
index b730cad0020..4ea5fd4f66b 100644
--- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
+++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
@@ -91,6 +91,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
       /* The orignal is now the temporary */
       nir_variable *temp = var;
 
+      /* Reparent the name to the new variable */
+      ralloc_steal(output, output->name);
+
       /* Give the output a new name with @out-temp appended */
       temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
       temp->data.mode = nir_var_global;
-- 
cgit v1.2.3


From 8c8fc5f8336c8c79e5890265ae6c03271aa94075 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 13:18:29 -0700
Subject: nir: Fix a bunch of ralloc parenting errors

As of a10d4937, we would really like things associated with an instruction
to be allocated out of that instruction and not out of the shader.  In
particular, you should be passing the instruction that will ultimately be
holding the source into nir_src_copy rather than an arbitrary memory
context.

We also change the prototypes of nir_dest_copy and nir_alu_src/dest_copy to
explicitly take an instruction so we catch this earlier in the future.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
---
 src/glsl/nir/nir.c                      | 20 +++++++++++---------
 src/glsl/nir/nir.h                      | 13 +++++++------
 src/glsl/nir/nir_from_ssa.c             |  2 +-
 src/glsl/nir/nir_lower_alu_to_scalar.c  |  6 +++---
 src/glsl/nir/nir_lower_atomics.c        |  2 +-
 src/glsl/nir/nir_lower_io.c             |  2 +-
 src/glsl/nir/nir_lower_locals_to_regs.c |  7 +++----
 src/glsl/nir/nir_lower_vec_to_movs.c    |  4 ++--
 src/glsl/nir/nir_opt_peephole_ffma.c    |  3 +--
 src/glsl/nir/nir_opt_peephole_select.c  |  4 ++--
 10 files changed, 32 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 1dc7e12445f..fd675322b12 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -153,7 +153,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
    }
 }
 
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
 {
    /* Copying an SSA definition makes no sense whatsoever. */
    assert(!src->is_ssa);
@@ -163,17 +163,18 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
    dest->reg.base_offset = src->reg.base_offset;
    dest->reg.reg = src->reg.reg;
    if (src->reg.indirect) {
-      dest->reg.indirect = ralloc(mem_ctx, nir_src);
-      nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      dest->reg.indirect = ralloc(instr, nir_src);
+      nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
    } else {
       dest->reg.indirect = NULL;
    }
 }
 
 void
-nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                 nir_alu_instr *instr)
 {
-   nir_src_copy(&dest->src, &src->src, mem_ctx);
+   nir_src_copy(&dest->src, &src->src, &instr->instr);
    dest->abs = src->abs;
    dest->negate = src->negate;
    for (unsigned i = 0; i < 4; i++)
@@ -181,9 +182,10 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
 }
 
 void
-nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                  nir_alu_instr *instr)
 {
-   nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+   nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
    dest->write_mask = src->write_mask;
    dest->saturate = src->saturate;
 }
@@ -1210,14 +1212,14 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
    nir_foreach_use_safe(def, use_src) {
       nir_instr *src_parent_instr = use_src->parent_instr;
       list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_instr);
       src_add_all_uses(use_src, src_parent_instr, NULL);
    }
 
    nir_foreach_if_use_safe(def, use_src) {
       nir_if *src_parent_if = use_src->parent_if;
       list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_if);
       src_add_all_uses(use_src, NULL, src_parent_if);
    }
 }
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f9c829570c5..92945f9df7f 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -580,8 +580,8 @@ nir_dest_for_reg(nir_register *reg)
    return dest;
 }
 
-void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
 
 typedef struct {
    nir_src src;
@@ -630,10 +630,6 @@ typedef struct {
    unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
 } nir_alu_dest;
 
-void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
-void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
-                       void *mem_ctx);
-
 typedef enum {
    nir_type_invalid = 0, /* Not a valid type */
    nir_type_float,
@@ -702,6 +698,11 @@ typedef struct nir_alu_instr {
    nir_alu_src src[];
 } nir_alu_instr;
 
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                      nir_alu_instr *instr);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                       nir_alu_instr *instr);
+
 /* is this source channel used? */
 static inline bool
 nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 94002f18cd7..d6569d8994a 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -556,7 +556,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
       assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
 
    nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+   nir_src_copy(&mov->src[0].src, &src, mov);
    mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
    mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
 
diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index efbe9e7175f..1607308abb1 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -46,11 +46,11 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
    for (unsigned i = 0; i < num_components; i++) {
       nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
       nir_alu_ssa_dest_init(chan, 1);
-      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
       chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
       if (nir_op_infos[chan_op].num_inputs > 1) {
          assert(nir_op_infos[chan_op].num_inputs == 2);
-         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+         nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
       }
 
@@ -153,7 +153,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
          unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
                               0 : chan);
 
-         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
          for (int j = 0; j < 4; j++)
             lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
       }
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index ce3615a3aa1..7ae8462882a 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -91,7 +91,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
          nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
          nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
          mul->dest.write_mask = 0x1;
-         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
          mul->src[1].src.is_ssa = true;
          mul->src[1].src.ssa = &atomic_counter_size->def;
          nir_instr_insert_before(&instr->instr, &mul->instr);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index afb463040cc..3739fc83f91 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -221,7 +221,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
          store->const_index[0] = offset;
 
-         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+         nir_src_copy(&store->src[0], &intrin->src[0], store);
 
          if (has_indirect)
             store->src[1] = indirect;
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index 28fdec50e04..b77d974f568 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -183,8 +183,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
             nir_alu_instr *add = nir_alu_instr_create(state->shader,
                                                       nir_op_iadd);
             add->src[0].src = *src.reg.indirect;
-            nir_src_copy(&add->src[1].src, &deref_array->indirect,
-                         state->shader);
+            nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
             add->dest.write_mask = 1;
             nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
             nir_instr_insert_before(instr, &add->instr);
@@ -225,7 +224,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
                                      nir_src_for_ssa(&mov->dest.dest.ssa),
                                      state->shader);
          } else {
-            nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
+            nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
          }
          nir_instr_insert_before(&intrin->instr, &mov->instr);
 
@@ -241,7 +240,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
                                              &intrin->instr, state);
 
          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
-         nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
+         nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
          mov->dest.write_mask = (1 << intrin->num_components) - 1;
          mov->dest.dest.is_ssa = false;
          mov->dest.dest.reg.reg = reg_src.reg.reg;
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index e6d522f88ce..b7f096d14ff 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -60,8 +60,8 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
    assert(src_idx < nir_op_infos[vec->op].num_inputs);
 
    nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
-   nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
+   nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
 
    mov->dest.write_mask = (1u << start_channel);
    mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c
index 97538e5e64a..a23123ea58c 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -216,8 +216,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
          for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
             ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
       }
-      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
-                       state->mem_ctx);
+      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
 
       assert(add->dest.dest.is_ssa);
 
diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c
index 26ec4ed92d3..5b6037a6c46 100644
--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -196,7 +196,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
 
       nir_phi_instr *phi = nir_instr_as_phi(instr);
       nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
-      nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+      nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
       /* Splat the condition to all channels */
       memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
 
@@ -206,7 +206,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
          assert(src->src.is_ssa);
 
          unsigned idx = src->pred == then_block ? 1 : 2;
-         nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+         nir_src_copy(&sel->src[idx].src, &src->src, sel);
       }
 
       nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
-- 
cgit v1.2.3


From a4aa25be1e0a27b1a6a6b0bcf576beb9dfe1ea7a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 13:24:35 -0700
Subject: nir: Remove the mem_ctx parameter from ssa_def_rewrite_uses

Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
---
 src/glsl/nir/nir.c                            | 2 +-
 src/glsl/nir/nir.h                            | 2 +-
 src/glsl/nir/nir_control_flow.c               | 2 +-
 src/glsl/nir/nir_from_ssa.c                   | 5 ++---
 src/glsl/nir/nir_lower_alu_to_scalar.c        | 6 ++----
 src/glsl/nir/nir_lower_atomics.c              | 3 +--
 src/glsl/nir/nir_lower_idiv.c                 | 4 +---
 src/glsl/nir/nir_lower_io.c                   | 3 +--
 src/glsl/nir/nir_lower_load_const_to_scalar.c | 3 +--
 src/glsl/nir/nir_lower_locals_to_regs.c       | 3 +--
 src/glsl/nir/nir_lower_phis_to_scalar.c       | 3 +--
 src/glsl/nir/nir_lower_system_values.c        | 3 +--
 src/glsl/nir/nir_lower_vars_to_ssa.c          | 6 ++----
 src/glsl/nir/nir_opt_constant_folding.c       | 4 ++--
 src/glsl/nir/nir_opt_cse.c                    | 6 ++----
 src/glsl/nir/nir_opt_dead_cf.c                | 4 +---
 src/glsl/nir/nir_opt_peephole_ffma.c          | 3 +--
 src/glsl/nir/nir_opt_peephole_select.c        | 3 +--
 src/glsl/nir/nir_opt_remove_phis.c            | 5 +----
 src/glsl/nir/nir_search.c                     | 2 +-
 20 files changed, 25 insertions(+), 47 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index fd675322b12..13489f0d38e 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1205,7 +1205,7 @@ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
 }
 
 void
-nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
 {
    assert(!new_src.is_ssa || def != new_src.ssa);
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 92945f9df7f..05cc42ca26d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1740,7 +1740,7 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
                        unsigned num_components, const char *name);
 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                       unsigned num_components, const char *name);
-void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
 
 /* visits basic blocks in source-code order */
 typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 5c03375ac77..768dfd2aff3 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -654,7 +654,7 @@ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
    nir_ssa_undef_instr *undef =
       nir_ssa_undef_instr_create(mem_ctx, def->num_components);
    nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
-   nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def), mem_ctx);
+   nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
    return true;
 }
 
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index d6569d8994a..084f43da87c 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -359,8 +359,7 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
       exec_list_push_tail(&block_pcopy->entries, &entry->node);
 
       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&entry->dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&entry->dest.ssa));
 
       nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
                             nir_src_for_ssa(&phi->dest.ssa));
@@ -493,7 +492,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
       reg->num_array_elems = 0;
    }
 
-   nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
+   nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg));
    assert(list_empty(&def->uses) && list_empty(&def->if_uses));
 
    if (def->parent_instr->type == nir_instr_type_ssa_undef) {
diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index 1607308abb1..710bb37409f 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -70,8 +70,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
    }
 
    assert(instr->dest.write_mask == 1);
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
-                            mem_ctx);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
    nir_instr_remove(&instr->instr);
 }
 
@@ -168,8 +167,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
    nir_instr_insert_before(&instr->instr, &vec_instr->instr);
 
    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
-                            nir_src_for_ssa(&vec_instr->dest.dest.ssa),
-                            mem_ctx);
+                            nir_src_for_ssa(&vec_instr->dest.dest.ssa));
 
    nir_instr_remove(&instr->instr);
 }
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 7ae8462882a..6f9ecc019ec 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -116,8 +116,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                         instr->dest.ssa.num_components, NULL);
       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
-                               nir_src_for_ssa(&new_instr->dest.ssa),
-                               mem_ctx);
+                               nir_src_for_ssa(&new_instr->dest.ssa));
    } else {
       nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
    }
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index 0e1653dd274..c961178c53a 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -116,9 +116,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
    }
 
    assert(alu->dest.dest.is_ssa);
-   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
-                            nir_src_for_ssa(q),
-                            ralloc_parent(alu));
+   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
 }
 
 static bool
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 3739fc83f91..9f79c5606ca 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -186,8 +186,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
             nir_ssa_dest_init(&load->instr, &load->dest,
                               intrin->num_components, NULL);
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&load->dest.ssa),
-                                     state->mem_ctx);
+                                     nir_src_for_ssa(&load->dest.ssa));
          } else {
             nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
          }
diff --git a/src/glsl/nir/nir_lower_load_const_to_scalar.c b/src/glsl/nir/nir_lower_load_const_to_scalar.c
index b83ef052ea9..704f8cebfd8 100644
--- a/src/glsl/nir/nir_lower_load_const_to_scalar.c
+++ b/src/glsl/nir/nir_lower_load_const_to_scalar.c
@@ -71,8 +71,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
    }
 
    /* Replace the old load with a reference to our reconstructed vector. */
-   nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec),
-                            ralloc_parent(b.impl));
+   nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec));
    nir_instr_remove(&lower->instr);
 }
 
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index b77d974f568..87d2498dd79 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -221,8 +221,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
             nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
                               intrin->num_components, NULL);
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&mov->dest.dest.ssa),
-                                     state->shader);
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
          } else {
             nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
          }
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index 739170d61fd..d72a71dfb6c 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -242,8 +242,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
       nir_instr_insert_after(&last_phi->instr, &vec->instr);
 
       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&vec->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&vec->dest.dest.ssa));
 
       ralloc_steal(state->dead_ctx, phi);
       nir_instr_remove(&phi->instr);
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index a6eec653e33..440fb0b1b8e 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -80,8 +80,7 @@ convert_instr(nir_intrinsic_instr *instr)
       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                         instr->dest.ssa.num_components, NULL);
       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
-                               nir_src_for_ssa(&new_instr->dest.ssa),
-                               mem_ctx);
+                               nir_src_for_ssa(&new_instr->dest.ssa));
    } else {
       nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
    }
diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c
index 4ff21663e57..59715072c15 100644
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -625,8 +625,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
                nir_instr_remove(&intrin->instr);
 
                nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                        nir_src_for_ssa(&undef->def),
-                                        state->shader);
+                                        nir_src_for_ssa(&undef->def));
                continue;
             }
 
@@ -650,8 +649,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
             nir_instr_remove(&intrin->instr);
 
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&mov->dest.dest.ssa),
-                                     state->shader);
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
             break;
          }
 
diff --git a/src/glsl/nir/nir_opt_constant_folding.c b/src/glsl/nir/nir_opt_constant_folding.c
index 85c09fc4800..007b81cfd41 100644
--- a/src/glsl/nir/nir_opt_constant_folding.c
+++ b/src/glsl/nir/nir_opt_constant_folding.c
@@ -80,8 +80,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
 
    nir_instr_insert_before(&instr->instr, &new_instr->instr);
 
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def),
-                            mem_ctx);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&new_instr->def));
 
    nir_instr_remove(&instr->instr);
    ralloc_free(instr);
diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c
index 864795ce5ed..64c94afd480 100644
--- a/src/glsl/nir/nir_opt_cse.c
+++ b/src/glsl/nir/nir_opt_cse.c
@@ -272,8 +272,7 @@ nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
       if (nir_instrs_equal(instr, other)) {
          nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
          nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
-                                  nir_src_for_ssa(other_def),
-                                  state->mem_ctx);
+                                  nir_src_for_ssa(other_def));
          nir_instr_remove(instr);
          state->progress = true;
          return;
@@ -286,8 +285,7 @@ nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
          if (nir_instrs_equal(instr, other)) {
             nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
             nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
-                                     nir_src_for_ssa(other_def),
-                                     state->mem_ctx);
+                                     nir_src_for_ssa(other_def));
             nir_instr_remove(instr);
             state->progress = true;
             return;
diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
index 5c5510484af..317bbc5ba63 100644
--- a/src/glsl/nir/nir_opt_dead_cf.c
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -83,8 +83,6 @@ remove_after_cf_node(nir_cf_node *node)
 static void
 opt_constant_if(nir_if *if_stmt, bool condition)
 {
-   void *mem_ctx = ralloc_parent(if_stmt);
-
    /* First, we need to remove any phi nodes after the if by rewriting uses to
     * point to the correct source.
     */
@@ -109,7 +107,7 @@ opt_constant_if(nir_if *if_stmt, bool condition)
 
       assert(def);
       assert(phi->dest.is_ssa);
-      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def), mem_ctx);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
       nir_instr_remove(instr);
    }
 
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c
index a23123ea58c..4f0f0dae04e 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -224,8 +224,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
                         add->dest.dest.ssa.num_components,
                         add->dest.dest.ssa.name);
       nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
-                               nir_src_for_ssa(&ffma->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&ffma->dest.dest.ssa));
 
       nir_instr_insert_before(&add->instr, &ffma->instr);
       assert(list_empty(&add->dest.dest.ssa.uses));
diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c
index 5b6037a6c46..90902b97ffc 100644
--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -214,8 +214,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
       sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
 
       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&sel->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&sel->dest.dest.ssa));
 
       nir_instr_insert_before(&phi->instr, &sel->instr);
       nir_instr_remove(&phi->instr);
diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c
index 7896584b4e4..bf4a67e70ea 100644
--- a/src/glsl/nir/nir_opt_remove_phis.c
+++ b/src/glsl/nir/nir_opt_remove_phis.c
@@ -47,8 +47,6 @@ remove_phis_block(nir_block *block, void *state)
 {
    bool *progress = state;
 
-   void *mem_ctx = ralloc_parent(block);
-
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_phi)
          break;
@@ -75,8 +73,7 @@ remove_phis_block(nir_block *block, void *state)
          continue;
 
       assert(phi->dest.is_ssa);
-      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def),
-                               mem_ctx);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
       nir_instr_remove(instr);
 
       *progress = true;
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index 51e69b06d8c..bb154407914 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -367,7 +367,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
    nir_instr_insert_before(&instr->instr, &mov->instr);
 
    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
-                            nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx);
+                            nir_src_for_ssa(&mov->dest.dest.ssa));
 
    /* We know this one has no more uses because we just rewrote them all,
     * so we can remove it.  The rest of the matched expression, however, we
-- 
cgit v1.2.3


From 106a3b2cc33c53ab16ffedc51248b04dd995dc17 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 15:58:08 -0700
Subject: nir: Only unlink sources that are actually valid

Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
---
 src/glsl/nir/nir.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 13489f0d38e..4a88cd1837c 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -716,10 +716,17 @@ nir_instr_insert(nir_cursor cursor, nir_instr *instr)
       nir_handle_add_jump(instr->block);
 }
 
+static bool
+src_is_valid(const nir_src *src)
+{
+   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+}
+
 static bool
 remove_use_cb(nir_src *src, void *state)
 {
-   list_del(&src->use_link);
+   if (src_is_valid(src))
+      list_del(&src->use_link);
 
    return true;
 }
@@ -1102,12 +1109,6 @@ nir_srcs_equal(nir_src src1, nir_src src2)
    }
 }
 
-static bool
-src_is_valid(const nir_src *src)
-{
-   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
-}
-
 static void
 src_remove_all_uses(nir_src *src)
 {
-- 
cgit v1.2.3


From cee29220e312f7c76a07343e501fa6a1c5f3d1aa Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 15:58:25 -0700
Subject: nir: Add a function for rewriting instruction destinations

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir.c | 24 ++++++++++++++++++++++++
 src/glsl/nir/nir.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 4a88cd1837c..aafcb939e3a 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1177,6 +1177,30 @@ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
    src_add_all_uses(src, NULL, if_stmt);
 }
 
+void
+nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+{
+   if (dest->is_ssa) {
+      /* We can only overwrite an SSA destination if it has no uses. */
+      assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+   } else {
+      list_del(&dest->reg.def_link);
+      if (dest->reg.indirect)
+         src_remove_all_uses(dest->reg.indirect);
+   }
+
+   /* We can't re-write with an SSA def */
+   assert(!new_dest.is_ssa);
+
+   nir_dest_copy(dest, &new_dest, instr);
+
+   dest->reg.parent_instr = instr;
+   list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+
+   if (dest->reg.indirect)
+      src_add_all_uses(dest->reg.indirect, instr, NULL);
+}
+
 void
 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                  unsigned num_components, const char *name)
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 05cc42ca26d..3f693b17fb1 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1735,6 +1735,8 @@ bool nir_srcs_equal(nir_src src1, nir_src src2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+                            nir_dest new_dest);
 
 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
                        unsigned num_components, const char *name);
-- 
cgit v1.2.3


From ca11c3c0a4726d4f51436d714ef81ef9a01348ea Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 16:03:10 -0700
Subject: nir/from_ssa: Use instr_rewrite_dest

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_from_ssa.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 084f43da87c..eaf883dbaa0 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -512,9 +512,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
     */
    nir_dest *dest = exec_node_data(nir_dest, def, ssa);
 
-   *dest = nir_dest_for_reg(reg);
-   dest->reg.parent_instr = state->instr;
-   list_addtail(&dest->reg.def_link, &reg->defs);
+   nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
 
    return true;
 }
-- 
cgit v1.2.3


From 4ca2896e8ea1908ea5a0d5bffe8a1b42145a6f72 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:36 +0200
Subject: softpipe: Move the faces array from view to filter_args

With that, sp_sampler_view instances are not abused anymore as a local
storage, so we can later make them constant.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 36 +++++++++++++++++-----------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +---
 2 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 489cae7a371..d5a7ed6f5a5 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2003,7 +2003,7 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
       args.s = s[j];
       args.t = t[j];
       args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
 
       if (lod[j] < 0.0) {
          args.level = psview->u.tex.first_level;
@@ -2087,7 +2087,7 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
       args.s = s[j];
       args.t = t[j];
       args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
 
       if (lod[j] < 0.0) {
          args.level = psview->u.tex.first_level;
@@ -2148,7 +2148,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
       args.s = s[j];
       args.t = t[j];
       args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
       if (lod[j] < 0.0) {
          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
@@ -2193,7 +2193,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
       args.s = s[j];
       args.t = t[j];
       args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
       mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
    }
 }
@@ -2239,6 +2239,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
                   const float s[TGSI_QUAD_SIZE],
                   const float t[TGSI_QUAD_SIZE],
                   const float p[TGSI_QUAD_SIZE],
+                  const float faces[TGSI_QUAD_SIZE],
                   unsigned level,
                   const float dudx, const float dvdx,
                   const float dudy, const float dvdy,
@@ -2319,7 +2320,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
       buffer_next = 0;
       den = 0;
-      args.face_id = sp_sview->faces[j];
+      args.face_id = faces[j];
 
       U = u0 - tex_u;
       for (v = v0; v <= v1; ++v) {
@@ -2528,7 +2529,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
          args.t = t[j];
          args.p = p[j];
          args.level = psview->u.tex.last_level;
-         args.face_id = sp_sview->faces[j];
+         args.face_id = filt_args->faces[j];
          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
    }
@@ -2537,7 +2538,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
        * seem to be worth the extra running time.
        */
       img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
-                        s, t, p, level0,
+                        s, t, p, filt_args->faces, level0,
                         dudx, dvdx, dudy, dvdy, rgba);
    }
 
@@ -2590,7 +2591,7 @@ mip_filter_linear_2d_linear_repeat_POT(
       args.s = s[j];
       args.t = t[j];
       args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
       args.offset = filt_args->offset;
       args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
       if ((unsigned)level0 >= psview->u.tex.last_level) {
@@ -3129,7 +3130,8 @@ convert_cube(struct sp_sampler_view *sp_sview,
              const float c0[TGSI_QUAD_SIZE],
              float ssss[TGSI_QUAD_SIZE],
              float tttt[TGSI_QUAD_SIZE],
-             float pppp[TGSI_QUAD_SIZE])
+             float pppp[TGSI_QUAD_SIZE],
+             float faces[TGSI_QUAD_SIZE])
 {
    unsigned j;
 
@@ -3176,7 +3178,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
             const float ima = -0.5F / fabsf(s[j]);
             ssss[j] = sign *  p[j] * ima + 0.5F;
             tttt[j] =         t[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
          }
       }
       else if (ary >= arx && ary >= arz) {
@@ -3187,7 +3189,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
             const float ima = -0.5F / fabsf(t[j]);
             ssss[j] =        -s[j] * ima + 0.5F;
             tttt[j] = sign * -p[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
          }
       }
       else {
@@ -3198,7 +3200,7 @@ convert_cube(struct sp_sampler_view *sp_sview,
             const float ima = -0.5F / fabsf(p[j]);
             ssss[j] = sign * -s[j] * ima + 0.5F;
             tttt[j] =         t[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
          }
       }
    }
@@ -3594,11 +3596,16 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
       float cs[TGSI_QUAD_SIZE];
       float ct[TGSI_QUAD_SIZE];
       float cp[TGSI_QUAD_SIZE];
+      float faces[TGSI_QUAD_SIZE];
 
-      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
 
+      filt_args.faces = faces;
       sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
    } else {
+      static const float zero_faces[TGSI_QUAD_SIZE] = {0.0f, 0.0f, 0.0f, 0.0f};
+
+      filt_args.faces = zero_faces;
       sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
    }
 }
@@ -3644,8 +3651,9 @@ sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
       float cs[TGSI_QUAD_SIZE];
       float ct[TGSI_QUAD_SIZE];
       float cp[TGSI_QUAD_SIZE];
+      float unused_faces[TGSI_QUAD_SIZE];
 
-      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, unused_faces);
       compute_lambda_lod_unclamped(sp_sview, sp_samp,
                                    cs, ct, cp, lod_in, control, lod);
    } else {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 72b4a1ac3e8..6743b7ead0b 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -72,6 +72,7 @@ typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
 struct filter_args {
    enum tgsi_sampler_control control;
    const int8_t *offset;
+   const float *faces;
 };
 
 typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
@@ -112,9 +113,6 @@ struct sp_sampler_view
    boolean pot2d;
    boolean need_cube_convert;
 
-   /* this is just abusing the sampler_view object as local storage */
-   unsigned faces[TGSI_QUAD_SIZE];
-
    /* these are different per shader type */
    struct softpipe_tex_tile_cache *cache;
    compute_lambda_func compute_lambda;
-- 
cgit v1.2.3


From ea0fecd1a3f46439c602e04870b34e6f27ad5b2e Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:37 +0200
Subject: softpipe: Constify some sampler and view parameters

This is to prepare for making tgsi_sampler parameter in query_lod a
const too. These functions do not modify anything in either sampler or
view anymore.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 51 ++++++++++++++--------------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +--
 2 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index d5a7ed6f5a5..f5a12642e9b 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1861,8 +1861,8 @@ compute_lod(const struct pipe_sampler_state *sampler,
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod_unclamped(struct sp_sampler_view *sp_sview,
-                             struct sp_sampler *sp_samp,
+compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
+                             const struct sp_sampler *sp_samp,
                              const float s[TGSI_QUAD_SIZE],
                              const float t[TGSI_QUAD_SIZE],
                              const float p[TGSI_QUAD_SIZE],
@@ -1965,8 +1965,8 @@ clamp_lod(const struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                      const float lod[TGSI_QUAD_SIZE],
                      float level[TGSI_QUAD_SIZE])
 {
@@ -2039,8 +2039,8 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for nearest mip filter
  */
 static void
-mip_rel_level_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                       const float lod[TGSI_QUAD_SIZE],
                       float level[TGSI_QUAD_SIZE])
 {
@@ -2109,8 +2109,8 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for none mip filter
  */
 static void
-mip_rel_level_none(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
+mip_rel_level_none(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
                    const float lod[TGSI_QUAD_SIZE],
                    float level[TGSI_QUAD_SIZE])
 {
@@ -2163,8 +2163,8 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for none mip filter
  */
 static void
-mip_rel_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
-                                    struct sp_sampler *sp_samp,
+mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+                                    const struct sp_sampler *sp_samp,
                                     const float lod[TGSI_QUAD_SIZE],
                                     float level[TGSI_QUAD_SIZE])
 {
@@ -2428,8 +2428,8 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear_aniso(struct sp_sampler_view *sp_sview,
-                           struct sp_sampler *sp_samp,
+mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
                            const float lod[TGSI_QUAD_SIZE],
                            float level[TGSI_QUAD_SIZE])
 {
@@ -2551,10 +2551,11 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
  * Get mip level relative to base level for linear mip filter
  */
 static void
-mip_rel_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
-                                          struct sp_sampler *sp_samp,
-                                          const float lod[TGSI_QUAD_SIZE],
-                                          float level[TGSI_QUAD_SIZE])
+mip_rel_level_linear_2d_linear_repeat_POT(
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
 {
    mip_rel_level_linear(sp_sview, sp_samp, lod, level);
 }
@@ -3049,9 +3050,9 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
  * or NULL.
  */
 static void
-get_filters(struct sp_sampler_view *sp_sview,
-            struct sp_sampler *sp_samp,
-            enum tgsi_sampler_control control,
+get_filters(const struct sp_sampler_view *sp_sview,
+            const struct sp_sampler *sp_samp,
+            const enum tgsi_sampler_control control,
             const struct sp_filter_funcs **funcs,
             img_filter_func *min,
             img_filter_func *mag)
@@ -3122,8 +3123,8 @@ sample_mip(struct sp_sampler_view *sp_sview,
  * faces[] array.
  */
 static void
-convert_cube(struct sp_sampler_view *sp_sview,
-             struct sp_sampler *sp_samp,
+convert_cube(const struct sp_sampler_view *sp_sview,
+             const struct sp_sampler *sp_samp,
              const float s[TGSI_QUAD_SIZE],
              const float t[TGSI_QUAD_SIZE],
              const float p[TGSI_QUAD_SIZE],
@@ -3624,10 +3625,10 @@ sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
 {
    static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
 
-   struct sp_tgsi_sampler *sp_tgsi_samp =
-      (struct sp_tgsi_sampler *)tgsi_sampler;
-   struct sp_sampler_view *sp_sview;
-   struct sp_sampler *sp_samp;
+   const struct sp_tgsi_sampler *sp_tgsi_samp =
+      (const struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_sampler_view *sp_sview;
+   const struct sp_sampler *sp_samp;
    const struct sp_filter_funcs *funcs;
    int i;
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 6743b7ead0b..e14e4ac9ce7 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -88,8 +88,8 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
                                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
 
 
-typedef void (*mip_level_func)(struct sp_sampler_view *sp_sview,
-                               struct sp_sampler *sp_samp,
+typedef void (*mip_level_func)(const struct sp_sampler_view *sp_sview,
+                               const struct sp_sampler *sp_samp,
                                const float lod[TGSI_QUAD_SIZE],
                                float level[TGSI_QUAD_SIZE]);
 
-- 
cgit v1.2.3


From ba72e6cfb8248ae0d8b3cf5ebf9add7c49f45743 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:38 +0200
Subject: tgsi, softpipe: Constify tgsi_sampler in query_lod vfunc

A followup from previous commit - since all functions called by
query_lod take pointers to const sp_sampler_view and const sp_sampler,
which are taken from tgsi_sampler subclass, we can the tgsi_sampler as
const itself now.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.h       | 2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5fc276c20b7..a371aa95e70 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -139,7 +139,7 @@ struct tgsi_sampler
                      const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                      const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
                      const unsigned sview_index,
                      const unsigned sampler_index,
                      const float s[TGSI_QUAD_SIZE],
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index f5a12642e9b..ba292c4d712 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3612,7 +3612,7 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 }
 
 static void
-sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
                   const unsigned sview_index,
                   const unsigned sampler_index,
                   const float s[TGSI_QUAD_SIZE],
-- 
cgit v1.2.3


From ea764baa61bec5b4ae15cf0d5928e3643061807d Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:39 +0200
Subject: softpipe: Constify sampler and view parameters in img filters

Those functions actually could always take them as constants.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 68 ++++++++++++++--------------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 +-
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index ba292c4d712..a2f18a4f3fa 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1017,8 +1017,8 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
 /* Some image-filter fastpaths:
  */
 static inline void
-img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                 const struct img_filter_args *args,
                                 float *rgba)
 {
@@ -1071,8 +1071,8 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
 
 
 static inline void
-img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
-                                 struct sp_sampler *sp_samp,
+img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
+                                 const struct sp_sampler *sp_samp,
                                  const struct img_filter_args *args,
                                  float rgba[TGSI_QUAD_SIZE])
 {
@@ -1105,8 +1105,8 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
 
 
 static inline void
-img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                 const struct img_filter_args *args,
                                 float rgba[TGSI_QUAD_SIZE])
 {
@@ -1147,8 +1147,8 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float rgba[TGSI_QUAD_SIZE])
 {
@@ -1179,8 +1179,8 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
+img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
+                            const struct sp_sampler *sp_samp,
                             const struct img_filter_args *args,
                             float *rgba)
 {
@@ -1213,8 +1213,8 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float *rgba)
 {
@@ -1248,8 +1248,8 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
+img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
+                            const struct sp_sampler *sp_samp,
                             const struct img_filter_args *args,
                             float *rgba)
 {
@@ -1285,8 +1285,8 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
-                        struct sp_sampler *sp_samp,
+img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
+                        const struct sp_sampler *sp_samp,
                         const struct img_filter_args *args,
                         float *rgba)
 {
@@ -1330,8 +1330,8 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
 }
 
 static void
-img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
-                              struct sp_sampler *sp_samp,
+img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
+                              const struct sp_sampler *sp_samp,
                               const struct img_filter_args *args,
                               float *rgba)
 {
@@ -1367,8 +1367,8 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
 }
 
 static void
-img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float *rgba)
 {
@@ -1401,8 +1401,8 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_1d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                      const struct img_filter_args *args,
                      float *rgba)
 {
@@ -1433,8 +1433,8 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
-                           struct sp_sampler *sp_samp,
+img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
                            const struct img_filter_args *args,
                            float *rgba)
 {
@@ -1533,8 +1533,8 @@ get_gather_value(const struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_2d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                      const struct img_filter_args *args,
                      float *rgba)
 {
@@ -1579,8 +1579,8 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
-                           struct sp_sampler *sp_samp,
+img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
                            const struct img_filter_args *args,
                            float *rgba)
 {
@@ -1627,8 +1627,8 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_cube_linear(struct sp_sampler_view *sp_sview,
-                       struct sp_sampler *sp_samp,
+img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
+                       const struct sp_sampler *sp_samp,
                        const struct img_filter_args *args,
                        float *rgba)
 {
@@ -1695,8 +1695,8 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
 
 
 static void
-img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
-                             struct sp_sampler *sp_samp,
+img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
+                             const struct sp_sampler *sp_samp,
                              const struct img_filter_args *args,
                              float *rgba)
 {
@@ -1764,8 +1764,8 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
 }
 
 static void
-img_filter_3d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                      const struct img_filter_args *args,
                      float *rgba)
 {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index e14e4ac9ce7..e8a0051fe73 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -64,8 +64,8 @@ struct img_filter_args {
    int gather_comp;
 };
 
-typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+typedef void (*img_filter_func)(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                 const struct img_filter_args *args,
                                 float *rgba);
 
-- 
cgit v1.2.3


From ac23116de56e163a3815626277f9c3691bb56831 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:40 +0200
Subject: softpipe: Constify sampler and view parameters in mip filters

Those functions actually could always take them as constants.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 32 ++++++++++++++--------------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  4 ++--
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index a2f18a4f3fa..c91288e2f52 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1908,8 +1908,8 @@ compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
+compute_lambda_lod(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
                    const float s[TGSI_QUAD_SIZE],
                    const float t[TGSI_QUAD_SIZE],
                    const float p[TGSI_QUAD_SIZE],
@@ -1974,8 +1974,8 @@ mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-mip_filter_linear(struct sp_sampler_view *sp_sview,
-                  struct sp_sampler *sp_samp,
+mip_filter_linear(const struct sp_sampler_view *sp_sview,
+                  const struct sp_sampler *sp_samp,
                   img_filter_func min_filter,
                   img_filter_func mag_filter,
                   const float s[TGSI_QUAD_SIZE],
@@ -2060,8 +2060,8 @@ mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
  */
 static void
-mip_filter_nearest(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
+mip_filter_nearest(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
                    img_filter_func min_filter,
                    img_filter_func mag_filter,
                    const float s[TGSI_QUAD_SIZE],
@@ -2122,8 +2122,8 @@ mip_rel_level_none(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-mip_filter_none(struct sp_sampler_view *sp_sview,
-                struct sp_sampler *sp_samp,
+mip_filter_none(const struct sp_sampler_view *sp_sview,
+                const struct sp_sampler *sp_samp,
                 img_filter_func min_filter,
                 img_filter_func mag_filter,
                 const float s[TGSI_QUAD_SIZE],
@@ -2172,8 +2172,8 @@ mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
-                                 struct sp_sampler *sp_samp,
+mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+                                 const struct sp_sampler *sp_samp,
                                  img_filter_func min_filter,
                                  img_filter_func mag_filter,
                                  const float s[TGSI_QUAD_SIZE],
@@ -2232,8 +2232,8 @@ create_filter_table(void)
  * "Fundamentals of Texture Mapping and Image Warping" (1989)
  */
 static void
-img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
-                  struct sp_sampler *sp_samp,
+img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
+                  const struct sp_sampler *sp_samp,
                   img_filter_func min_filter,
                   img_filter_func mag_filter,
                   const float s[TGSI_QUAD_SIZE],
@@ -2440,8 +2440,8 @@ mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
  * Sample 2D texture using an anisotropic filter.
  */
 static void
-mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
-                        struct sp_sampler *sp_samp,
+mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
+                        const struct sp_sampler *sp_samp,
                         img_filter_func min_filter,
                         img_filter_func mag_filter,
                         const float s[TGSI_QUAD_SIZE],
@@ -2566,8 +2566,8 @@ mip_rel_level_linear_2d_linear_repeat_POT(
  */
 static void
 mip_filter_linear_2d_linear_repeat_POT(
-   struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
    img_filter_func min_filter,
    img_filter_func mag_filter,
    const float s[TGSI_QUAD_SIZE],
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index e8a0051fe73..83ee3a33839 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -75,8 +75,8 @@ struct filter_args {
    const float *faces;
 };
 
-typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+typedef void (*mip_filter_func)(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                 img_filter_func min_filter,
                                 img_filter_func mag_filter,
                                 const float s[TGSI_QUAD_SIZE],
-- 
cgit v1.2.3


From 231687c19b3c6ab4f5bead7469b7b27fdbda6d43 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:41 +0200
Subject: softpipe: Constify sp_tgsi_sampler

Add a small inline function doing the casting - this is to make sure
we don't do a cast from some completely unrelated type. This commit
does not make tgsi_sampler parameters const in vfuncs themselves for
now - probably llvmpipe would need looking at before making such a
change.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 35 ++++++++++++++++++----------
 1 file changed, 23 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index c91288e2f52..b0c4989b2bc 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2658,8 +2658,8 @@ static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
  * Do shadow/depth comparisons.
  */
 static void
-sample_compare(struct sp_sampler_view *sp_sview,
-               struct sp_sampler *sp_samp,
+sample_compare(const struct sp_sampler_view *sp_sview,
+               const struct sp_sampler *sp_samp,
                const float s[TGSI_QUAD_SIZE],
                const float t[TGSI_QUAD_SIZE],
                const float p[TGSI_QUAD_SIZE],
@@ -3083,8 +3083,8 @@ get_filters(const struct sp_sampler_view *sp_sview,
 }
 
 static void
-sample_mip(struct sp_sampler_view *sp_sview,
-           struct sp_sampler *sp_samp,
+sample_mip(const struct sp_sampler_view *sp_sview,
+           const struct sp_sampler *sp_samp,
            const float s[TGSI_QUAD_SIZE],
            const float t[TGSI_QUAD_SIZE],
            const float p[TGSI_QUAD_SIZE],
@@ -3209,7 +3209,8 @@ convert_cube(const struct sp_sampler_view *sp_sview,
 
 
 static void
-sp_get_dims(struct sp_sampler_view *sp_sview, int level,
+sp_get_dims(const struct sp_sampler_view *sp_sview,
+            int level,
             int dims[4])
 {
    const struct pipe_sampler_view *view = &sp_sview->base;
@@ -3267,7 +3268,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
  * coords to the texture image size.
  */
 static void
-sp_get_texels(struct sp_sampler_view *sp_sview,
+sp_get_texels(const struct sp_sampler_view *sp_sview,
               const int v_i[TGSI_QUAD_SIZE],
               const int v_j[TGSI_QUAD_SIZE],
               const int v_k[TGSI_QUAD_SIZE],
@@ -3537,12 +3538,20 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
 }
 
 
+static inline const struct sp_tgsi_sampler *
+sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
+{
+   return (const struct sp_tgsi_sampler *)sampler;
+}
+
+
 static void
 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
                  const unsigned sview_index,
                  int level, int dims[4])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
 
    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    /* always have a view here but texture is NULL if no sampler view was set. */
@@ -3568,9 +3577,10 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
                     enum tgsi_sampler_control control,
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
-   struct sp_sampler_view *sp_sview;
-   struct sp_sampler *sp_samp;
+   const struct sp_tgsi_sampler *sp_tgsi_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
+   const struct sp_sampler_view *sp_sview;
+   const struct sp_sampler *sp_samp;
    struct filter_args filt_args;
 
    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
@@ -3626,7 +3636,7 @@ sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
    static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
 
    const struct sp_tgsi_sampler *sp_tgsi_samp =
-      (const struct sp_tgsi_sampler *)tgsi_sampler;
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
    const struct sp_sampler_view *sp_sview;
    const struct sp_sampler *sp_samp;
    const struct sp_filter_funcs *funcs;
@@ -3674,7 +3684,8 @@ sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
                   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
 
    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    /* always have a view here but texture is NULL if no sampler view was set. */
-- 
cgit v1.2.3


From 2135aba8d99d5b0b5f73d97d4aac6a25d69de57c Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <krzesimir@kinvolk.io>
Date: Fri, 11 Sep 2015 20:07:42 +0200
Subject: softpipe: Constify variables

This commit makes a lot of variables constant - this is basically done
by moving the computation to variable definition. Some of them are
moved into lower scopes (like in img_filter_2d_ewa).

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 441 ++++++++++++---------------
 1 file changed, 199 insertions(+), 242 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index b0c4989b2bc..dfe38af5354 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -135,7 +135,7 @@ wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [0,1) */
    /* i limited to [0,size-1] */
-   int i = util_ifloor(s * size);
+   const int i = util_ifloor(s * size);
    *icoord = repeat(i + offset, size);
 }
 
@@ -280,7 +280,7 @@ static void
 wrap_linear_repeat(float s, unsigned size, int offset,
                    int *icoord0, int *icoord1, float *w)
 {
-   float u = s * size - 0.5F;
+   const float u = s * size - 0.5F;
    *icoord0 = repeat(util_ifloor(u) + offset, size);
    *icoord1 = repeat(*icoord0 + 1, size);
    *w = frac(u);
@@ -291,9 +291,8 @@ static void
 wrap_linear_clamp(float s, unsigned size, int offset,
                   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
 
-   u = u - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -304,8 +303,7 @@ static void
 wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
                           int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    if (*icoord0 < 0)
@@ -322,8 +320,7 @@ wrap_linear_clamp_to_border(float s, unsigned size, int offset,
 {
    const float min = -0.5F;
    const float max = (float)size + 0.5F;
-   float u = CLAMP(s * size + offset, min, max);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, min, max) - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -391,12 +388,8 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
 {
    const float min = -0.5F;
    const float max = size + 0.5F;
-   float u = fabsf(s * size + offset);
-   if (u <= min)
-      u = min;
-   else if (u >= max)
-      u = max;
-   u -= 0.5F;
+   const float t = fabsf(s * size + offset);
+   const float u = CLAMP(t, min, max) - 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -409,7 +402,7 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
 static void
 wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
 {
-   int i = util_ifloor(s);
+   const int i = util_ifloor(s);
    *icoord = CLAMP(i + offset, 0, (int) size-1);
 }
 
@@ -442,7 +435,7 @@ wrap_linear_unorm_clamp(float s, unsigned size, int offset,
                         int *icoord0, int *icoord1, float *w)
 {
    /* Not exactly what the spec says, but it matches NVIDIA output */
-   float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
+   const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -456,8 +449,7 @@ static void
 wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
                                   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    if (*icoord1 > (int) size - 1)
@@ -473,8 +465,7 @@ static void
 wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
                                 int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    if (*icoord1 > (int) size - 1)
@@ -489,7 +480,7 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
 static inline int
 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 {
-   int c = util_ifloor(coord + 0.5F);
+   const int c = util_ifloor(coord + 0.5F);
    return CLAMP(c, (int)first_layer, (int)last_layer);
 }
 
@@ -505,9 +496,9 @@ compute_lambda_1d(const struct sp_sampler_view *sview,
                   const float p[TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 
    return util_fast_log2(rho);
 }
@@ -520,13 +511,13 @@ compute_lambda_2d(const struct sp_sampler_view *sview,
                   const float p[TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
-   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
-   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
-   float rho  = MAX2(maxx, maxy);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+   const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
+   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+   const float rho  = MAX2(maxx, maxy);
 
    return util_fast_log2(rho);
 }
@@ -539,19 +530,16 @@ compute_lambda_3d(const struct sp_sampler_view *sview,
                   const float p[TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
-   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
-   float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
-   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
-   float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
-   float rho;
-
-   rho = MAX2(maxx, maxy);
-   rho = MAX2(rho, maxz);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+   const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
+   const float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
+   const float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
+   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+   const float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
+   const float rho = MAX3(maxx, maxy, maxz);
 
    return util_fast_log2(rho);
 }
@@ -609,7 +597,7 @@ get_texel_2d(const struct sp_sampler_view *sp_sview,
              union tex_tile_address addr, int x, int y)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
 
    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
        y < 0 || y >= (int) u_minify(texture->height0, level)) {
@@ -852,7 +840,7 @@ get_texel_3d(const struct sp_sampler_view *sp_sview,
              union tex_tile_address addr, int x, int y, int z)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
 
    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
        y < 0 || y >= (int) u_minify(texture->height0, level) ||
@@ -872,7 +860,7 @@ get_texel_1d_array(const struct sp_sampler_view *sp_sview,
                    union tex_tile_address addr, int x, int y)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
 
    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
       return sp_samp->base.border_color.f;
@@ -890,7 +878,7 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview,
                    union tex_tile_address addr, int x, int y, int layer)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
 
    assert(layer < (int) texture->array_size);
    assert(layer >= 0);
@@ -911,7 +899,7 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
                         float *corner, int layer, unsigned face)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
    int new_x, new_y, max_x;
 
    max_x = (int) u_minify(texture->width0, level);
@@ -966,7 +954,7 @@ get_texel_cube_array(const struct sp_sampler_view *sp_sview,
                      union tex_tile_address addr, int x, int y, int layer)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
 
    assert(layer < (int) texture->array_size);
    assert(layer >= 0);
@@ -1022,24 +1010,24 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
                                 const struct img_filter_args *args,
                                 float *rgba)
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
-   int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+   const int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
    union tex_tile_address addr;
    int c;
 
-   float u = (args->s * xpot - 0.5F) + args->offset[0];
-   float v = (args->t * ypot - 0.5F) + args->offset[1];
+   const float u = (args->s * xpot - 0.5F) + args->offset[0];
+   const float v = (args->t * ypot - 0.5F) + args->offset[1];
 
-   int uflr = util_ifloor(u);
-   int vflr = util_ifloor(v);
+   const int uflr = util_ifloor(u);
+   const int vflr = util_ifloor(v);
 
-   float xw = u - (float)uflr;
-   float yw = v - (float)vflr;
+   const float xw = u - (float)uflr;
+   const float yw = v - (float)vflr;
 
-   int x0 = uflr & (xpot - 1);
-   int y0 = vflr & (ypot - 1);
+   const int x0 = uflr & (xpot - 1);
+   const int y0 = vflr & (ypot - 1);
 
    const float *tx[4];
       
@@ -1052,8 +1040,8 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
       get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
    }
    else {
-      unsigned x1 = (x0 + 1) & (xpot - 1);
-      unsigned y1 = (y0 + 1) & (ypot - 1);
+      const unsigned x1 = (x0 + 1) & (xpot - 1);
+      const unsigned y1 = (y0 + 1) & (ypot - 1);
       get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
    }
 
@@ -1076,20 +1064,20 @@ img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
                                  const struct img_filter_args *args,
                                  float rgba[TGSI_QUAD_SIZE])
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
    const float *out;
    union tex_tile_address addr;
    int c;
 
-   float u = args->s * xpot + args->offset[0];
-   float v = args->t * ypot + args->offset[1];
+   const float u = args->s * xpot + args->offset[0];
+   const float v = args->t * ypot + args->offset[1];
 
-   int uflr = util_ifloor(u);
-   int vflr = util_ifloor(v);
+   const int uflr = util_ifloor(u);
+   const int vflr = util_ifloor(v);
 
-   int x0 = uflr & (xpot - 1);
-   int y0 = vflr & (ypot - 1);
+   const int x0 = uflr & (xpot - 1);
+   const int y0 = vflr & (ypot - 1);
 
    addr.value = 0;
    addr.bits.level = args->level;
@@ -1110,13 +1098,13 @@ img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
                                 const struct img_filter_args *args,
                                 float rgba[TGSI_QUAD_SIZE])
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
    union tex_tile_address addr;
    int c;
 
-   float u = args->s * xpot + args->offset[0];
-   float v = args->t * ypot + args->offset[1];
+   const float u = args->s * xpot + args->offset[0];
+   const float v = args->t * ypot + args->offset[1];
 
    int x0, y0;
    const float *out;
@@ -1153,14 +1141,12 @@ img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
                       float rgba[TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
+   const int width = u_minify(texture->width0, args->level);
    int x;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-
    assert(width > 0);
 
    addr.value = 0;
@@ -1185,22 +1171,20 @@ img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
                             float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
-   int x, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-
    assert(width > 0);
 
    addr.value = 0;
    addr.bits.level = args->level;
 
    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
-   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
 
    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1219,15 +1203,13 @@ img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
                       float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
    int x, y;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
  
@@ -1254,15 +1236,15 @@ img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
                             float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x, y;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
  
@@ -1271,8 +1253,6 @@ img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
 
    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
-   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
 
    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1291,15 +1271,14 @@ img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
                         float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layerface;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layerface = args->face_id + sp_sview->base.u.tex.first_layer;
+   int x, y;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
  
@@ -1319,7 +1298,6 @@ img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
       sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
    }
 
-   layerface = args->face_id + sp_sview->base.u.tex.first_layer;
    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
       rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1336,15 +1314,17 @@ img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
                               float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layerface;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layerface =
+      coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.last_layer - 5) + args->face_id;
+   int x, y;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
  
@@ -1353,9 +1333,6 @@ img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
 
    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
-   layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
-                              sp_sview->base.u.tex.first_layer,
-                              sp_sview->base.u.tex.last_layer - 5) + args->face_id;
 
    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1373,16 +1350,14 @@ img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
                       float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height, depth;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int depth = u_minify(texture->depth0, args->level);
    int x, y, z;
    union tex_tile_address addr;
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-   depth = u_minify(texture->depth0, args->level);
-
    assert(width > 0);
    assert(height > 0);
    assert(depth > 0);
@@ -1407,15 +1382,13 @@ img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
+   const int width = u_minify(texture->width0, args->level);
    int x0, x1;
    float xw; /* weights */
    union tex_tile_address addr;
    const float *tx0, *tx1;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-
    assert(width > 0);
 
    addr.value = 0;
@@ -1439,23 +1412,21 @@ img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
                            float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
-   int x0, x1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x0, x1;
    float xw; /* weights */
    union tex_tile_address addr;
    const float *tx0, *tx1;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-
    assert(width > 0);
 
    addr.value = 0;
    addr.bits.level = args->level;
 
    sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
-   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
 
    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
    tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
@@ -1539,16 +1510,14 @@ img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
    int x0, y0, x1, y1;
    float xw, yw; /* weights */
    union tex_tile_address addr;
    const float *tx[4];
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
 
@@ -1585,16 +1554,16 @@ img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
                            float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x0, y0, x1, y1;
    float xw, yw; /* weights */
    union tex_tile_address addr;
    const float *tx[4];
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
 
@@ -1603,8 +1572,6 @@ img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
 
    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
-   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
 
    tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
    tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
@@ -1633,8 +1600,10 @@ img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
                        float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = sp_sview->base.u.tex.first_layer;
+   int x0, y0, x1, y1;
    float xw, yw; /* weights */
    union tex_tile_address addr;
    const float *tx[4];
@@ -1642,9 +1611,6 @@ img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
 
@@ -1665,8 +1631,6 @@ img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
    }
 
-   layer = sp_sview->base.u.tex.first_layer;
-
    if (sp_samp->base.seamless_cube_map) {
       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
@@ -1701,8 +1665,13 @@ img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
                              float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer =
+      coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.last_layer - 5);
+   int x0, y0, x1, y1;
    float xw, yw; /* weights */
    union tex_tile_address addr;
    const float *tx[4];
@@ -1710,9 +1679,6 @@ img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
    assert(width > 0);
    assert(height > 0);
 
@@ -1733,10 +1699,6 @@ img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
    }
 
-   layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer - 5);
-
    if (sp_samp->base.seamless_cube_map) {
       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
@@ -1770,17 +1732,15 @@ img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height, depth;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int depth = u_minify(texture->depth0, args->level);
    int x0, x1, y0, y1, z0, z1;
    float xw, yw, zw; /* interpolation weights */
    union tex_tile_address addr;
    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
    int c;
 
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-   depth = u_minify(texture->depth0, args->level);
-
    addr.value = 0;
    addr.bits.level = args->level;
 
@@ -1826,8 +1786,8 @@ compute_lod(const struct pipe_sampler_state *sampler,
             const float lod_in[TGSI_QUAD_SIZE],
             float lod[TGSI_QUAD_SIZE])
 {
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
    uint i;
 
    switch (control) {
@@ -1998,7 +1958,7 @@ mip_filter_linear(const struct sp_sampler_view *sp_sview,
    args.gather_comp = get_gather_component(lod_in);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = psview->u.tex.first_level + (int)lod[j];
+      const int level0 = psview->u.tex.first_level + (int)lod[j];
 
       args.s = s[j];
       args.t = t[j];
@@ -2093,7 +2053,7 @@ mip_filter_nearest(const struct sp_sampler_view *sp_sview,
          args.level = psview->u.tex.first_level;
          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       } else {
-         int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
+         const int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
          args.level = MIN2(level, (int)psview->u.tex.last_level);
          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
@@ -2202,7 +2162,7 @@ mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
 /* For anisotropic filtering */
 #define WEIGHT_LUT_SIZE 1024
 
-static float *weightLut = NULL;
+static const float *weightLut = NULL;
 
 /**
  * Creates the look-up table used to speed-up EWA sampling
@@ -2212,14 +2172,15 @@ create_filter_table(void)
 {
    unsigned i;
    if (!weightLut) {
-      weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
+      float *lut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
 
       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
-         float alpha = 2;
-         float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
-         float weight = (float) exp(-alpha * r2);
-         weightLut[i] = weight;
+         const float alpha = 2;
+         const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
+         const float weight = (float) exp(-alpha * r2);
+         lut[i] = weight;
       }
+      weightLut = lut;
    }
 }
 
@@ -2248,15 +2209,15 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
    const struct pipe_resource *texture = sp_sview->base.texture;
 
    // ??? Won't the image filters blow up if level is negative?
-   unsigned level0 = level > 0 ? level : 0;
-   float scaling = 1.0f / (1 << level0);
-   int width = u_minify(texture->width0, level0);
-   int height = u_minify(texture->height0, level0);
+   const unsigned level0 = level > 0 ? level : 0;
+   const float scaling = 1.0f / (1 << level0);
+   const int width = u_minify(texture->width0, level0);
+   const int height = u_minify(texture->height0, level0);
    struct img_filter_args args;
-   float ux = dudx * scaling;
-   float vx = dvdx * scaling;
-   float uy = dudy * scaling;
-   float vy = dvdy * scaling;
+   const float ux = dudx * scaling;
+   const float vx = dvdx * scaling;
+   const float uy = dudy * scaling;
+   const float vy = dvdy * scaling;
 
    /* compute ellipse coefficients to bound the region: 
     * A*x*x + B*x*y + C*y*y = F.
@@ -2270,29 +2231,15 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
    /* assert(F > 0.0); */
 
    /* Compute the ellipse's (u,v) bounding box in texture space */
-   float d = -B*B+4.0f*C*A;
-   float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
-   float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
+   const float d = -B*B+4.0f*C*A;
+   const float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
+   const float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
 
    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
    float s_buffer[TGSI_QUAD_SIZE];
    float t_buffer[TGSI_QUAD_SIZE];
    float weight_buffer[TGSI_QUAD_SIZE];
-   unsigned buffer_next;
    int j;
-   float den; /* = 0.0F; */
-   float ddq;
-   float U; /* = u0 - tex_u; */
-   int v;
-
-   /* Scale ellipse formula to directly index the Filter Lookup Table.
-    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
-    */
-   double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
-   A *= formScale;
-   B *= formScale;
-   C *= formScale;
-   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
 
    /* For each quad, the du and dx values are the same and so the ellipse is
     * also the same. Note that texel/image access can only be performed using
@@ -2301,7 +2248,16 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
     * full, then the pixel values are read from the image.
     */
-   ddq = 2 * A;
+   const float ddq = 2 * A;
+
+   /* Scale ellipse formula to directly index the Filter Lookup Table.
+    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
+    */
+   const double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
+   A *= formScale;
+   B *= formScale;
+   C *= formScale;
+   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
 
    args.level = level;
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
@@ -2309,22 +2265,23 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
        * value, q, is less than F, we're inside the ellipse
        */
-      float tex_u = -0.5F + s[j] * texture->width0 * scaling;
-      float tex_v = -0.5F + t[j] * texture->height0 * scaling;
+      const float tex_u = -0.5F + s[j] * texture->width0 * scaling;
+      const float tex_v = -0.5F + t[j] * texture->height0 * scaling;
 
-      int u0 = (int) floorf(tex_u - box_u);
-      int u1 = (int) ceilf(tex_u + box_u);
-      int v0 = (int) floorf(tex_v - box_v);
-      int v1 = (int) ceilf(tex_v + box_v);
+      const int u0 = (int) floorf(tex_u - box_u);
+      const int u1 = (int) ceilf(tex_u + box_u);
+      const int v0 = (int) floorf(tex_v - box_v);
+      const int v1 = (int) ceilf(tex_v + box_v);
+      const float U = u0 - tex_u;
 
       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
-      buffer_next = 0;
-      den = 0;
+      unsigned buffer_next = 0;
+      float den = 0;
+      int v;
       args.face_id = faces[j];
 
-      U = u0 - tex_u;
       for (v = v0; v <= v1; ++v) {
-         float V = v - tex_v;
+         const float V = v - tex_v;
          float dq = A * (2 * U + 1) + B * V;
          float q = (C * V + B * U) * V + A * U * U;
 
@@ -2338,7 +2295,7 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
                 * should not happen, though
                 */
                const int qClamped = q >= 0.0F ? q : 0;
-               float weight = weightLut[qClamped];
+               const float weight = weightLut[qClamped];
 
                weight_buffer[buffer_next] = weight;
                s_buffer[buffer_next] = u / ((float) width);
@@ -2458,12 +2415,12 @@ mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
    float lambda;
    float lod[TGSI_QUAD_SIZE];
 
-   float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
-   float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
-   float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
-   float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
-   float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
-   float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+   const float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
+   const float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
+   const float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+   const float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+   const float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+   const float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
    struct img_filter_args args;
 
    if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
@@ -2473,8 +2430,8 @@ mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
       /* note: instead of working with Px and Py, we will use the 
        * squared length instead, to avoid sqrt.
        */
-      float Px2 = dudx * dudx + dvdx * dvdx;
-      float Py2 = dudy * dudy + dvdy * dvdy;
+      const float Px2 = dudx * dudx + dvdx * dvdx;
+      const float Py2 = dudy * dudy + dvdy * dvdy;
 
       float Pmax2;
       float Pmin2;
@@ -2585,7 +2542,7 @@ mip_filter_linear_2d_linear_repeat_POT(
    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = psview->u.tex.first_level + (int)lod[j];
+      const int level0 = psview->u.tex.first_level + (int)lod[j];
       struct img_filter_args args;
       /* Catches both negative and large values of level0:
        */
@@ -2605,7 +2562,7 @@ mip_filter_linear_2d_linear_repeat_POT(
 
       }
       else {
-         float levelBlend = frac(lod[j]);
+         const float levelBlend = frac(lod[j]);
          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
          int c;
 
@@ -2672,9 +2629,14 @@ sample_compare(const struct sp_sampler_view *sp_sview,
    int j, v;
    int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
    float pc[4];
-   const struct util_format_description *format_desc;
-   unsigned chan_type;
-   bool is_gather = (control == TGSI_SAMPLER_GATHER);
+   const struct util_format_description *format_desc =
+      util_format_description(sp_sview->base.format);
+   /* not entirely sure we couldn't end up with non-valid swizzle here */
+   const unsigned chan_type =
+      format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+      format_desc->channel[format_desc->swizzle[0]].type :
+      UTIL_FORMAT_TYPE_FLOAT;
+   const bool is_gather = (control == TGSI_SAMPLER_GATHER);
 
    /**
     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -2701,11 +2663,6 @@ sample_compare(const struct sp_sampler_view *sp_sview,
       pc[3] = p[3];
    }
 
-   format_desc = util_format_description(sp_sview->base.format);
-   /* not entirely sure we couldn't end up with non-valid swizzle here */
-   chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
-                  format_desc->channel[format_desc->swizzle[0]].type :
-                  UTIL_FORMAT_TYPE_FLOAT;
    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
       /*
        * clamping is a result of conversion to texture format, hence
@@ -3280,24 +3237,24 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
    const struct pipe_resource *texture = sp_sview->base.texture;
    int j, c;
    const float *tx;
-   int width, height, depth;
-
-   addr.value = 0;
    /* TODO write a better test for LOD */
-   addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
-                        CLAMP(lod[0] + sp_sview->base.u.tex.first_level, 
-                              sp_sview->base.u.tex.first_level,
-                              sp_sview->base.u.tex.last_level);
+   const unsigned level =
+      sp_sview->base.target == PIPE_BUFFER ? 0 :
+      CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
+            sp_sview->base.u.tex.first_level,
+            sp_sview->base.u.tex.last_level);
+   const int width = u_minify(texture->width0, level);
+   const int height = u_minify(texture->height0, level);
+   const int depth = u_minify(texture->depth0, level);
 
-   width = u_minify(texture->width0, addr.bits.level);
-   height = u_minify(texture->height0, addr.bits.level);
-   depth = u_minify(texture->depth0, addr.bits.level);
+   addr.value = 0;
+   addr.bits.level = level;
 
    switch (sp_sview->base.target) {
    case PIPE_BUFFER:
    case PIPE_TEXTURE_1D:
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
          tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
@@ -3306,9 +3263,9 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
       break;
    case PIPE_TEXTURE_1D_ARRAY:
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
-                       sp_sview->base.u.tex.last_layer);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
+                             sp_sview->base.u.tex.last_layer);
          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
@@ -3318,8 +3275,8 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
    case PIPE_TEXTURE_2D:
    case PIPE_TEXTURE_RECT:
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
@@ -3328,10 +3285,10 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
       break;
    case PIPE_TEXTURE_2D_ARRAY:
       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
-         int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
-                           sp_sview->base.u.tex.last_layer);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         const int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
+                                 sp_sview->base.u.tex.last_layer);
          tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
@@ -3480,7 +3437,7 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
                              const struct pipe_sampler_view *templ)
 {
    struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
-   struct softpipe_resource *spr = (struct softpipe_resource *)resource;
+   const struct softpipe_resource *spr = (struct softpipe_resource *)resource;
 
    if (sview) {
       struct pipe_sampler_view *view = &sview->base;
-- 
cgit v1.2.3


From 0337a9b2af6cb72eb2ca3ee2e17d5e06ea7aeacd Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 11 Sep 2015 03:11:43 +0100
Subject: r600: define some values for the fetch constant offsets.

This just puts these in one place and #defines them.

Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 30 +++++++++++++++++-------------
 src/gallium/drivers/r600/evergreend.h      |  8 ++++++++
 src/gallium/drivers/r600/r600_state.c      | 25 +++++++++++--------------
 src/gallium/drivers/r600/r600d.h           | 11 +++++++++++
 4 files changed, 47 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0c54a3fe953..52f4dc81d9f 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1888,12 +1888,12 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 
 static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-	evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
+	evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0);
 }
 
 static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-	evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+	evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS,
 				      RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
@@ -1963,7 +1963,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 
 static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 176,
+	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+					EG_FETCH_CONSTANTS_OFFSET_VS,
 					R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
 					R_028980_ALU_CONST_CACHE_VS_0,
 					0 /* PKT3 flags */);
@@ -1971,7 +1972,8 @@ static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct
 
 static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+					EG_FETCH_CONSTANTS_OFFSET_GS,
 					R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
 					R_0289C0_ALU_CONST_CACHE_GS_0,
 					0 /* PKT3 flags */);
@@ -1979,15 +1981,17 @@ static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct
 
 static void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
-				       R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
-				       R_028940_ALU_CONST_CACHE_PS_0,
-				       0 /* PKT3 flags */);
+	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+					EG_FETCH_CONSTANTS_OFFSET_PS,
+					R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+					R_028940_ALU_CONST_CACHE_PS_0,
+					0 /* PKT3 flags */);
 }
 
 static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 816,
+	evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE],
+					EG_FETCH_CONSTANTS_OFFSET_CS,
 					R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
 					R_028F40_ALU_CONST_CACHE_LS_0,
 					RADEON_CP_PACKET3_COMPUTE_MODE);
@@ -2031,25 +2035,25 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
 	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
-	                             176 + R600_MAX_CONST_BUFFERS, 0);
+	                             EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
 }
 
 static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
 	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
-	                             336 + R600_MAX_CONST_BUFFERS, 0);
+	                             EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0);
 }
 
 static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
 	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
-	                             R600_MAX_CONST_BUFFERS, 0);
+	                             EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0);
 }
 
 static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
 	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
-	                             816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
+	                             EG_FETCH_CONSTANTS_OFFSET_CS + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
 static void evergreen_emit_sampler_states(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index ad6ad434b78..937ffcbddb9 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -2496,4 +2496,12 @@
 #define    DMA_PACKET_CONSTANT_FILL             0xd
 #define    DMA_PACKET_NOP                       0xf
 
+#define EG_FETCH_CONSTANTS_OFFSET_PS 0
+#define EG_FETCH_CONSTANTS_OFFSET_VS 176
+#define EG_FETCH_CONSTANTS_OFFSET_GS 336
+#define EG_FETCH_CONSTANTS_OFFSET_HS 496
+#define EG_FETCH_CONSTANTS_OFFSET_LS 656
+#define EG_FETCH_CONSTANTS_OFFSET_CS 816
+#define EG_FETCH_CONSTANTS_OFFSET_FS 992
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3464c382dc6..7084c5f359b 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1705,9 +1705,9 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 
 		offset = vb->buffer_offset;
 
-		/* fetch resources start at index 320 */
+		/* fetch resources start at index 320 (OFFSET_FS) */
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
-		radeon_emit(cs, (320 + buffer_index) * 7);
+		radeon_emit(cs, (R600_FETCH_CONSTANTS_OFFSET_FS + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
 		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
@@ -1778,21 +1778,24 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 
 static void r600_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 160,
+	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+				   R600_FETCH_CONSTANTS_OFFSET_VS,
 				   R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
 				   R_028980_ALU_CONST_CACHE_VS_0);
 }
 
 static void r600_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+				   R600_FETCH_CONSTANTS_OFFSET_GS,
 				   R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
 				   R_0289C0_ALU_CONST_CACHE_GS_0);
 }
 
 static void r600_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
+	r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+				   R600_FETCH_CONSTANTS_OFFSET_PS,
 				   R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
 				   R_028940_ALU_CONST_CACHE_PS_0);
 }
@@ -1829,26 +1832,20 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 	state->dirty_mask = 0;
 }
 
-/* Resource IDs:
- *   PS: 0   .. +160
- *   VS: 160 .. +160
- *   FS: 320 .. +16
- *   GS: 336 .. +160
- */
 
 static void r600_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 160 + R600_MAX_CONST_BUFFERS);
+	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, R600_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS);
 }
 
 static void r600_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, R600_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS);
 }
 
 static void r600_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+	r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS);
 }
 
 static void r600_emit_sampler_states(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 3c08ba5906a..6bba88cb574 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3783,4 +3783,15 @@
 #define DMA_PACKET_CONSTANT_FILL	0xd /* 7xx only */
 #define DMA_PACKET_NOP			0xf
 
+
+/* Resource IDs:
+ *   PS: 0   .. +160
+ *   VS: 160 .. +160
+ *   FS: 320 .. +16
+ *   GS: 336 .. +160
+ */
+#define R600_FETCH_CONSTANTS_OFFSET_PS 0
+#define R600_FETCH_CONSTANTS_OFFSET_VS 160
+#define R600_FETCH_CONSTANTS_OFFSET_FS 320
+#define R600_FETCH_CONSTANTS_OFFSET_GS 336
 #endif
-- 
cgit v1.2.3


From f9caabe8f1bff86d19b53d9ecba5c72b238d9e23 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 11 Sep 2015 04:43:53 +0100
Subject: r600g: lower number of driver const buffers

I'm going to want a driver constant buffer for tess to coordinate
LDS storage, so before I go tackling that I decided to merge the
clip/samplepos and texture info buffers into one. So I can steal
the spare one.

This creates a single constant buffer between the two, with
clip/samplepos taking up a reserved 128 bytes at the start.

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_pipe.h         |  28 +++--
 src/gallium/drivers/r600/r600_shader.c       |  21 ++--
 src/gallium/drivers/r600/r600_state_common.c | 152 ++++++++++++++++++---------
 3 files changed, 131 insertions(+), 70 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 25df831339c..d0774de8573 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -63,13 +63,15 @@
 #define R600_TRACE_CS_DWORDS		7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
  * of 16 const buffers.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -77,8 +79,6 @@
  * In order to support d3d 11 mandated minimum of 15 user const buffers
  * we'd have to squash all use cases into one driver buffer.
  */
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
 #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -356,11 +356,15 @@ struct r600_textures_info {
 	struct r600_samplerview_state	views;
 	struct r600_sampler_states	states;
 	bool				is_array_sampler[NUM_TEX_UNITS];
+};
 
-	/* cube array txq workaround */
-	uint32_t			*txq_constants;
-	/* buffer related workarounds */
-	uint32_t			*buffer_constants;
+struct r600_shader_driver_constants_info {
+	/* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+	uint32_t			*constants;
+	uint32_t			alloc_size;
+	bool				vs_ucp_dirty;
+	bool				texture_const_dirty;
+	bool				ps_sample_pos_dirty;
 };
 
 struct r600_constbuf_state
@@ -472,6 +476,9 @@ struct r600_context {
 	struct r600_gs_rings_state	gs_rings;
 	struct r600_constbuf_state	constbuf_state[PIPE_SHADER_TYPES];
 	struct r600_textures_info	samplers[PIPE_SHADER_TYPES];
+
+	struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
 	/** Vertex buffers for fetch shaders */
 	struct r600_vertexbuf_state	vertex_buffer_state;
 	/** Vertex buffers for compute shaders */
@@ -498,6 +505,7 @@ struct r600_context {
 
 	void				*sb_context;
 	struct r600_isa		*isa;
+	float sample_positions[4 * 16];
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f2c9e169f74..93b1bf7d5b4 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -60,6 +60,7 @@ issued in the w slot as well.
 The compiler must issue the source argument to slots z, y, and x
 */
 
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
 				 union r600_shader_key key);
@@ -947,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
 
 	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
 	vtx.op = FETCH_OP_VFETCH;
-	vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
 	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
 	if (sample_id == NULL) {
 		vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
@@ -2307,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 				alu.src[0].chan = j;
 
 				alu.src[1].sel = 512 + i;
-				alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+				alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 				alu.src[1].chan = j;
 
 				alu.dst.sel = clipdist_temp[oreg];
@@ -5499,7 +5500,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
 		alu.src[0].sel = vtx.dst_gpr;
 		alu.src[0].chan = i;
 
-		alu.src[1].sel = 512 + (id * 2);
+		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+		alu.src[1].sel += (id * 2);
 		alu.src[1].chan = i % 4;
 		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5521,7 +5523,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
 		alu.src[0].sel = vtx.dst_gpr;
 		alu.src[0].chan = 3;
 
-		alu.src[1].sel = 512 + (id * 2) + 1;
+		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
 		alu.src[1].chan = 0;
 		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5542,14 +5544,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
 
 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.op = ALU_OP1_MOV;
-
+	alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
 	if (ctx->bc->chip_class >= EVERGREEN) {
 		/* channel 0 or 2 of each word */
-		alu.src[0].sel = 512 + (id / 2);
+		alu.src[0].sel += (id / 2);
 		alu.src[0].chan = (id % 2) * 2;
 	} else {
 		/* r600 we have them at channel 2 of the second dword */
-		alu.src[0].sel = 512 + (id * 2) + 1;
+		alu.src[0].sel += (id * 2) + 1;
 		alu.src[0].chan = 1;
 	}
 	alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -6207,13 +6209,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP1_MOV;
 
+		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
 		if (ctx->bc->chip_class >= EVERGREEN) {
 			/* channel 1 or 3 of each word */
-			alu.src[0].sel = 512 + (id / 2);
+			alu.src[0].sel += (id / 2);
 			alu.src[0].chan = ((id % 2) * 2) + 1;
 		} else {
 			/* r600 we have them at channel 2 of the second dword */
-			alu.src[0].sel = 512 + (id * 2) + 1;
+			alu.src[0].sel += (id * 2) + 1;
 			alu.src[0].chan = 2;
 		}
 		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ae1341187cb..21c89dc0b61 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -240,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx,
 				const struct pipe_clip_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct pipe_constant_buffer cb;
 
 	rctx->clip_state.state = *state;
 	r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
-	cb.buffer = NULL;
-	cb.user_buffer = state->ucp;
-	cb.buffer_offset = 0;
-	cb.buffer_size = 4*4*8;
-	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
+	rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
 }
 
 static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -1053,6 +1046,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
 	r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
 }
 
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+	int sh, size;;
+	void *ptr;
+	struct pipe_constant_buffer cb;
+	for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+		struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+		if (!info->vs_ucp_dirty &&
+		    !info->texture_const_dirty &&
+		    !info->ps_sample_pos_dirty)
+			continue;
+
+		ptr = info->constants;
+		size = info->alloc_size;
+		if (info->vs_ucp_dirty) {
+			assert(sh == PIPE_SHADER_VERTEX);
+			if (!size) {
+				ptr = rctx->clip_state.state.ucp;
+				size = R600_UCP_SIZE;
+			} else {
+				memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+			}
+			info->vs_ucp_dirty = false;
+		}
+
+		if (info->ps_sample_pos_dirty) {
+			assert(sh == PIPE_SHADER_FRAGMENT);
+			if (!size) {
+				ptr = rctx->sample_positions;
+				size = R600_UCP_SIZE;
+			} else {
+				memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+			}
+			info->ps_sample_pos_dirty = false;
+		}
+
+		if (info->texture_const_dirty) {
+			assert (ptr);
+			assert (size);
+			if (sh == PIPE_SHADER_VERTEX)
+				memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+			if (sh == PIPE_SHADER_FRAGMENT)
+				memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+		}
+		info->texture_const_dirty = false;
+
+		cb.buffer = NULL;
+		cb.user_buffer = ptr;
+		cb.buffer_offset = 0;
+		cb.buffer_size = size;
+		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+		pipe_resource_reference(&cb.buffer, NULL);
+	}
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+				   int array_size, uint32_t *base_offset)
+{
+	struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+	if (array_size + R600_UCP_SIZE > info->alloc_size) {
+		info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+		info->alloc_size = array_size + R600_UCP_SIZE;
+	}
+	memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+	info->texture_const_dirty = true;
+	*base_offset = R600_UCP_SIZE;
+	return info->constants;
+}
 /*
  * On r600/700 hw we don't have vertex fetch swizzle, though TBO
  * doesn't require full swizzles it does need masking and setting alpha
@@ -1067,9 +1128,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
 	int bits;
 	uint32_t array_size;
-	struct pipe_constant_buffer cb;
 	int i, j;
-
+	uint32_t *constants;
+	uint32_t base_offset;
 	if (!samplers->views.dirty_buffer_constants)
 		return;
 
@@ -1077,38 +1138,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 
 	bits = util_last_bit(samplers->views.enabled_mask);
 	array_size = bits * 8 * sizeof(uint32_t) * 4;
-	samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-	memset(samplers->buffer_constants, 0, array_size);
+
+	constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
 	for (i = 0; i < bits; i++) {
 		if (samplers->views.enabled_mask & (1 << i)) {
-			int offset = i * 8;
+			int offset = (base_offset / 4) + i * 8;
 			const struct util_format_description *desc;
 			desc = util_format_description(samplers->views.views[i]->base.format);
 
 			for (j = 0; j < 4; j++)
 				if (j < desc->nr_channels)
-					samplers->buffer_constants[offset+j] = 0xffffffff;
+					constants[offset+j] = 0xffffffff;
 				else
-					samplers->buffer_constants[offset+j] = 0x0;
+					constants[offset+j] = 0x0;
 			if (desc->nr_channels < 4) {
 				if (desc->channel[0].pure_integer)
-					samplers->buffer_constants[offset+4] = 1;
+					constants[offset+4] = 1;
 				else
-					samplers->buffer_constants[offset+4] = fui(1.0);
+					constants[offset+4] = fui(1.0);
 			} else
-				samplers->buffer_constants[offset + 4] = 0;
+				constants[offset + 4] = 0;
 
-			samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-			samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+			constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
 		}
 	}
 
-	cb.buffer = NULL;
-	cb.user_buffer = samplers->buffer_constants;
-	cb.buffer_offset = 0;
-	cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* On evergreen we store two values
@@ -1120,9 +1176,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
 	int bits;
 	uint32_t array_size;
-	struct pipe_constant_buffer cb;
 	int i;
-
+	uint32_t *constants;
+	uint32_t base_offset;
 	if (!samplers->views.dirty_buffer_constants)
 		return;
 
@@ -1130,45 +1186,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 
 	bits = util_last_bit(samplers->views.enabled_mask);
 	array_size = bits * 2 * sizeof(uint32_t) * 4;
-	samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-	memset(samplers->buffer_constants, 0, array_size);
+
+	constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+					  &base_offset);
+
 	for (i = 0; i < bits; i++) {
 		if (samplers->views.enabled_mask & (1 << i)) {
-			uint32_t offset = i * 2;
-			samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-			samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+			uint32_t offset = (base_offset / 4) + i * 2;
+			constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
 		}
 	}
-
-	cb.buffer = NULL;
-	cb.user_buffer = samplers->buffer_constants;
-	cb.buffer_offset = 0;
-	cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
-	struct pipe_constant_buffer constbuf = {0};
-	float values[4*16] = {0.0f};
 	int i;
 	struct pipe_context *ctx = &rctx->b.b;
 
-	assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+	assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+	assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+	memset(rctx->sample_positions, 0, 4 * 4 * 16);
 	for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
-		ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+		ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
 		/* Also fill in center-zeroed positions used for interpolateAtSample */
-		values[4*i + 2] = values[4*i + 0] - 0.5f;
-		values[4*i + 3] = values[4*i + 1] - 0.5f;
+		rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+		rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
 	}
 
-	constbuf.user_buffer = values;
-	constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
-	ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
-		R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
-	pipe_resource_reference(&constbuf.buffer, NULL);
+	rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
 }
 
 static void update_shader_atom(struct pipe_context *ctx,
@@ -1387,6 +1435,8 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		}
 	}
 
+	r600_update_driver_const_buffers(rctx);
+
 	if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
 		if (!r600_adjust_gprs(rctx)) {
 			/* discard rendering */
-- 
cgit v1.2.3


From d2ca9afd5da2a0228bbf7a9af5a2bb94424441c9 Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Fri, 11 Sep 2015 12:42:22 +0200
Subject: r600g: Support I2D/U2D/D2I/D2U

Only for Cypress/Cayman/Aruba, older chips have only partial fp64 support.
Uses float intermediate values so only accurate for int24 range, which
matches what the blob does.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 106 ++++++++++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 93b1bf7d5b4..f83ea62863e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3059,6 +3059,96 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+
+static int egcm_int_to_double(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int i, r;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+	assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
+		inst->Instruction.Opcode == TGSI_OPCODE_U2D);
+
+	for (i = 0; i <= (lasti+1)/2; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ctx->inst_info->op;
+
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		alu.dst.sel = ctx->temp_reg;
+		alu.dst.chan = i;
+		alu.dst.write = 1;
+		alu.last = 1;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i <= lasti; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_FLT32_TO_FLT64;
+
+		alu.src[0].chan = i/2;
+		if (i%2 == 0)
+			alu.src[0].sel = ctx->temp_reg;
+		else {
+			alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+			alu.src[0].value = 0x0;
+		}
+		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		alu.last = i == lasti;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int egcm_double_to_int(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int i, r;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+	assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
+		inst->Instruction.Opcode == TGSI_OPCODE_D2U);
+
+	for (i = 0; i <= lasti; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_FLT64_TO_FLT32;
+
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i));
+		alu.dst.chan = i;
+		alu.dst.sel = ctx->temp_reg;
+		alu.dst.write = i%2 == 0;
+		alu.last = i == lasti;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i <= (lasti+1)/2; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ctx->inst_info->op;
+
+		alu.src[0].chan = i*2;
+		alu.src[0].sel = ctx->temp_reg;
+		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+		alu.last = 1;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
 static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8153,10 +8243,10 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
 	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
 	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
-	[TGSI_OPCODE_D2I]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_I2D]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_D2U]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_U2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
 	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
@@ -8375,10 +8465,10 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DFRAC]	= { ALU_OP1_FRACT_64, tgsi_op2_64},
 	[TGSI_OPCODE_DLDEXP]	= { ALU_OP2_LDEXP_64, tgsi_op2_64},
 	[TGSI_OPCODE_DFRACEXP]	= { ALU_OP1_FREXP_64, tgsi_dfracexp},
-	[TGSI_OPCODE_D2I]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_I2D]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_D2U]	= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_U2D]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_D2I]	= { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+	[TGSI_OPCODE_I2D]	= { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+	[TGSI_OPCODE_D2U]	= { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+	[TGSI_OPCODE_U2D]	= { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
 	[TGSI_OPCODE_DRSQ]	= { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
-- 
cgit v1.2.3


From ce34048b575240347f8426c541971d00c810d3c9 Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Fri, 11 Sep 2015 12:42:23 +0200
Subject: r600: Enable fp64 on chips with native support

Cypress/Cayman/Aruba, earlier r6xx/r7xx chips only support a subset
of the needed fp64 ops, and don't do GL4 anyway.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 docs/GL3.txt                         | 4 ++--
 docs/relnotes/11.1.0.html            | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 31c7619d3fd..97f642bd135 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
   - Enhanced per-sample shading                        DONE (r600)
   - Interpolation functions                            DONE (r600)
   - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
   GL_ARB_sample_shading                                DONE (i965, nv50, r600)
   GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
   GL_ARB_tessellation_shader                           DONE ()
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
   GL_ARB_get_program_binary                            DONE (0 binary formats)
   GL_ARB_separate_shader_objects                       DONE (all drivers)
   GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
   GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)
 
 
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 603b06f41c9..528905f772b 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_ARB_shader_texture_image_samples on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 </ul>
 
 <h2>Bug fixes</h2>
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index fd9c16c6a96..a18ec49215c 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -500,6 +500,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 			return PIPE_SHADER_IR_TGSI;
 		}
 	case PIPE_SHADER_CAP_DOUBLES:
+		if (rscreen->b.family == CHIP_CYPRESS ||
+			rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA)
+			return 1;
 		return 0;
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
-- 
cgit v1.2.3


From dd7290cf59206c49f1a322d53baa9957b13d2949 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 16:19:22 -0700
Subject: i965/emit: Add assertions for accumulator restrictions

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 637fd074ff1..0432efa7175 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -235,6 +235,15 @@ validate_reg(const struct brw_device_info *devinfo,
        reg.file == BRW_ARF_NULL)
       return;
 
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Swizzling is not allowed when an accumulator is used as an implicit
+    *    source or an explicit source in an instruction."
+    */
+   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       reg.nr == BRW_ARF_ACCUMULATOR)
+      assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+
    assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
    hstride = hstride_for_reg[reg.hstride];
 
@@ -443,6 +452,14 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Accumulator registers may be accessed explicitly as src0
+    *    operands only."
+    */
+   assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          reg.nr != BRW_ARF_ACCUMULATOR);
+
    gen7_convert_mrf_to_grf(p, &reg);
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
-- 
cgit v1.2.3


From 1037e0a84f61f4b1815093bcfd548d4b58ca106f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 16:19:42 -0700
Subject: i965/vec4: Don't reswizzle hardware registers

Cc: "11.0 10.6" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91719
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c4da1a11be8..9d863c273e9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -954,6 +954,14 @@ vec4_instruction::can_reswizzle(int dst_writemask,
    if (mlen > 0)
       return false;
 
+   /* We can't use swizzles on the accumulator and that's really the only
+    * HW_REG we would care to reswizzle so just disallow them all.
+    */
+   for (int i = 0; i < 3; i++) {
+      if (src[i].file == HW_REG)
+         return false;
+   }
+
    return true;
 }
 
-- 
cgit v1.2.3


From d6fbcf6ee28c273b37bf293aea5faf77253029a3 Mon Sep 17 00:00:00 2001
From: Kai Wasserbäch <kai@dev.carbon-project.org>
Date: Sat, 12 Sep 2015 10:39:50 +0200
Subject: r600: Fix llvm build since const buffer changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit f9caabe8f1bff86d19b53d9ecba5c72b238d9e23:

One place in r600_llvm.c was forgotten when replacing
R600_UCP_CONST_BUFFER with R600_BUFFER_INFO_CONST_BUFFER.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91985
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Signed-off-by: Dave Airlie <airlied@gmail.com>
---
 src/gallium/drivers/r600/r600_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 3362fd00dba..372cd419c73 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -22,7 +22,7 @@
 #if defined R600_USE_LLVM || defined HAVE_OPENCL
 
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
-#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
+#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
 	(CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
 
-- 
cgit v1.2.3


From 86358e949eaa13c075338901024d0e1009fa7e99 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 9 Sep 2015 18:28:55 -0400
Subject: tgsi/scan: add support to figure out max nesting depth

Sometimes a useful thing for compilers (or, for example, tgsi_to_nir) to
know.  And pretty trivial for scan to figure this out for us.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 16 ++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_scan.h |  5 +++++
 2 files changed, 21 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 9810b5468d9..66306d7d5d2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -56,6 +56,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 {
    uint procType, i;
    struct tgsi_parse_context parse;
+   unsigned current_depth = 0;
 
    memset(info, 0, sizeof(*info));
    for (i = 0; i < TGSI_FILE_COUNT; i++)
@@ -100,6 +101,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
             assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
             info->opcode_count[fullinst->Instruction.Opcode]++;
 
+            switch (fullinst->Instruction.Opcode) {
+            case TGSI_OPCODE_IF:
+            case TGSI_OPCODE_UIF:
+            case TGSI_OPCODE_BGNLOOP:
+               current_depth++;
+               info->max_depth = MAX2(info->max_depth, current_depth);
+               break;
+            case TGSI_OPCODE_ENDIF:
+            case TGSI_OPCODE_ENDLOOP:
+               current_depth--;
+               break;
+            default:
+               break;
+            }
+
             if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
                 fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
                info->uses_doubles = true;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index c5900bc05d4..42539ee9f45 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -113,6 +113,11 @@ struct tgsi_shader_info
    unsigned indirect_files_written;
 
    unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+   /**
+    * Max nesting limit of loops/if's
+    */
+   unsigned max_depth;
 };
 
 extern void
-- 
cgit v1.2.3


From b88aeff4f51e8a505a8c5e5905d80ae3b75ed3ef Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 10 Sep 2015 16:06:05 -0400
Subject: nir: add nir_channel() to get at single components of vec's

Rather than make yet another copy of channel(), let's move it into nir.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_builder.h                  |  6 ++++++
 src/glsl/nir/nir_lower_tex_projector.c      | 24 +++++++++---------------
 src/glsl/nir/nir_normalize_cubemap_coords.c | 20 +++++++-------------
 3 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index ffa31c90a45..cf50f699eae 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -216,6 +216,12 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
                      nir_imov_alu(build, alu_src, num_components);
 }
 
+static inline nir_ssa_def *
+nir_channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c
index 6530021c8b7..9afa42f23a9 100644
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ b/src/glsl/nir/nir_lower_tex_projector.c
@@ -30,12 +30,6 @@
 #include "nir.h"
 #include "nir_builder.h"
 
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
-   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
-
 static bool
 nir_lower_tex_projector_block(nir_block *block, void *void_state)
 {
@@ -79,21 +73,21 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state)
             switch (tex->coord_components) {
             case 4:
                projected = nir_vec4(b,
-                                    channel(b, projected, 0),
-                                    channel(b, projected, 1),
-                                    channel(b, projected, 2),
-                                    channel(b, unprojected, 3));
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, projected, 1),
+                                    nir_channel(b, projected, 2),
+                                    nir_channel(b, unprojected, 3));
                break;
             case 3:
                projected = nir_vec3(b,
-                                    channel(b, projected, 0),
-                                    channel(b, projected, 1),
-                                    channel(b, unprojected, 2));
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, projected, 1),
+                                    nir_channel(b, unprojected, 2));
                break;
             case 2:
                projected = nir_vec2(b,
-                                    channel(b, projected, 0),
-                                    channel(b, unprojected, 1));
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, unprojected, 1));
                break;
             default:
                unreachable("bad texture coord count for array");
diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c
index 75b647f96cb..ca68bd7a94c 100644
--- a/src/glsl/nir/nir_normalize_cubemap_coords.c
+++ b/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -33,12 +33,6 @@
  * or 1.0.  This is based on the old GLSL IR based pass by Eric.
  */
 
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
-   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
-
 static bool
 normalize_cubemap_coords_block(nir_block *block, void *void_state)
 {
@@ -63,9 +57,9 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
          assert(orig_coord->num_components >= 3);
 
          nir_ssa_def *abs = nir_fabs(b, orig_coord);
-         nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
-                                         nir_fmax(b, channel(b, abs, 1),
-                                                     channel(b, abs, 2)));
+         nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0),
+                                         nir_fmax(b, nir_channel(b, abs, 1),
+                                                     nir_channel(b, abs, 2)));
 
          nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
 
@@ -74,10 +68,10 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
           */
          if (tex->coord_components == 4) {
             normalized = nir_vec4(b,
-                                  channel(b, normalized, 0),
-                                  channel(b, normalized, 1),
-                                  channel(b, normalized, 2),
-                                  channel(b, orig_coord, 3));
+                                  nir_channel(b, normalized, 0),
+                                  nir_channel(b, normalized, 1),
+                                  nir_channel(b, normalized, 2),
+                                  nir_channel(b, orig_coord, 3));
          }
 
          nir_instr_rewrite_src(&tex->instr,
-- 
cgit v1.2.3


From bf45a7d28e7934463ee465b613a9101259520e13 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sun, 13 Sep 2015 11:21:28 -0400
Subject: freedreno/ir3: fix compile break after a4aa25be

Following commit dropped the unused memctx arg:

   commit a4aa25be1e0a27b1a6a6b0bcf576beb9dfe1ea7a
   Author:     Jason Ekstrand <jason.ekstrand@intel.com>
   AuthorDate: Wed Sep 9 13:24:35 2015 -0700
   Commit:     Jason Ekstrand <jason.ekstrand@intel.com>
   CommitDate: Fri Sep 11 09:21:20 2015 -0700

       nir: Remove the mem_ctx parameter from ssa_def_rewrite_uses

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
index d57eb2ba713..4ec0e2bd2ac 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -294,8 +294,7 @@ lower_if_else_block(nir_block *block, void *void_state)
 		sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
 
 		nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-				nir_src_for_ssa(&sel->dest.dest.ssa),
-				state->mem_ctx);
+				nir_src_for_ssa(&sel->dest.dest.ssa));
 
 		nir_instr_insert_before(&phi->instr, &sel->instr);
 		nir_instr_remove(&phi->instr);
-- 
cgit v1.2.3


From 59519c2283e91ce4b8c2028673d6d8dc4ee5138f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sun, 13 Sep 2015 11:22:51 -0400
Subject: freedreno/ir3: fix compile warn after 1807a08e

New enum to add to switch so compiler doesn't complain.

   commit 1807a08e4f35b014f2a80d1e88dd74a9f096d7a5
   Author:     Ilia Mirkin <imirkin@alum.mit.edu>
   AuthorDate: Thu Aug 27 23:05:03 2015 -0400
   Commit:     Ilia Mirkin <imirkin@alum.mit.edu>
   CommitDate: Thu Sep 10 17:38:33 2015 -0400

       nir: add nir_texop_texture_samples and convert from glsl

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 071901a3cc7..5a069fb5378 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1623,6 +1623,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 	case nir_texop_lod:
 	case nir_texop_tg4:
 	case nir_texop_query_levels:
+	case nir_texop_texture_samples:
 		compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
 		return;
 	}
-- 
cgit v1.2.3


From 08ceb5e076328bf6ccceed3a8e5de205dcaf63b0 Mon Sep 17 00:00:00 2001
From: Krzesimir Nowak <qdlacz@gmail.com>
Date: Sat, 12 Sep 2015 08:17:00 -0600
Subject: softpipe: Change faces type to uint

This is to avoid needless float<->int conversions, since all
face-related computations are made on integers. Spotted by Emil
Velikov.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 10 +++++-----
 src/gallium/drivers/softpipe/sp_tex_sample.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index dfe38af5354..8a0935062b6 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2200,7 +2200,7 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
                   const float s[TGSI_QUAD_SIZE],
                   const float t[TGSI_QUAD_SIZE],
                   const float p[TGSI_QUAD_SIZE],
-                  const float faces[TGSI_QUAD_SIZE],
+                  const uint faces[TGSI_QUAD_SIZE],
                   unsigned level,
                   const float dudx, const float dvdx,
                   const float dudy, const float dvdy,
@@ -3089,7 +3089,7 @@ convert_cube(const struct sp_sampler_view *sp_sview,
              float ssss[TGSI_QUAD_SIZE],
              float tttt[TGSI_QUAD_SIZE],
              float pppp[TGSI_QUAD_SIZE],
-             float faces[TGSI_QUAD_SIZE])
+             uint faces[TGSI_QUAD_SIZE])
 {
    unsigned j;
 
@@ -3564,14 +3564,14 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
       float cs[TGSI_QUAD_SIZE];
       float ct[TGSI_QUAD_SIZE];
       float cp[TGSI_QUAD_SIZE];
-      float faces[TGSI_QUAD_SIZE];
+      uint faces[TGSI_QUAD_SIZE];
 
       convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
 
       filt_args.faces = faces;
       sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
    } else {
-      static const float zero_faces[TGSI_QUAD_SIZE] = {0.0f, 0.0f, 0.0f, 0.0f};
+      static const uint zero_faces[TGSI_QUAD_SIZE] = {0, 0, 0, 0};
 
       filt_args.faces = zero_faces;
       sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
@@ -3619,7 +3619,7 @@ sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
       float cs[TGSI_QUAD_SIZE];
       float ct[TGSI_QUAD_SIZE];
       float cp[TGSI_QUAD_SIZE];
-      float unused_faces[TGSI_QUAD_SIZE];
+      uint unused_faces[TGSI_QUAD_SIZE];
 
       convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, unused_faces);
       compute_lambda_lod_unclamped(sp_sview, sp_samp,
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 83ee3a33839..d591487d1b4 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -72,7 +72,7 @@ typedef void (*img_filter_func)(const struct sp_sampler_view *sp_sview,
 struct filter_args {
    enum tgsi_sampler_control control;
    const int8_t *offset;
-   const float *faces;
+   const uint *faces;
 };
 
 typedef void (*mip_filter_func)(const struct sp_sampler_view *sp_sview,
-- 
cgit v1.2.3


From bf8d6e501c58b3d6c06056b663ec99b9c7eeb9cf Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 10 Oct 2014 08:28:24 -0700
Subject: glsl/cs: Add gl_LocalInvocationID variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/builtin_variables.cpp | 4 ++++
 src/glsl/shader_enums.h        | 9 +++++++++
 2 files changed, 13 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index cf1be037525..f185d939444 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -402,6 +402,7 @@ private:
    const glsl_type * const vec2_t;
    const glsl_type * const vec3_t;
    const glsl_type * const vec4_t;
+   const glsl_type * const uvec3_t;
    const glsl_type * const mat3_t;
    const glsl_type * const mat4_t;
 
@@ -417,6 +418,7 @@ builtin_variable_generator::builtin_variable_generator(
      bool_t(glsl_type::bool_type), int_t(glsl_type::int_type),
      float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type),
      vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type),
+     uvec3_t(glsl_type::uvec3_type),
      mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type)
 {
 }
@@ -1051,6 +1053,8 @@ builtin_variable_generator::generate_fs_special_vars()
 void
 builtin_variable_generator::generate_cs_special_vars()
 {
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
+                    "gl_LocalInvocationID");
    /* TODO: finish this. */
 }
 
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 9bb163f3bb0..3b80a1dd4ce 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -238,6 +238,8 @@ typedef enum
 #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
 #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
 #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+#define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << SYSTEM_VALUE_LOCAL_INVOCATION_ID)
+
 /**
  * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
  * one of these values.  If a NIR variable's mode is nir_var_system_value, it
@@ -363,6 +365,13 @@ typedef enum
    SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
    /*@}*/
 
+   /**
+    * \name Compute shader system values
+    */
+   /*@{*/
+   SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+   /*@}*/
+
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
-- 
cgit v1.2.3


From 62e011d593008e61ddcba3c32b5710505f4974e6 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 13 Mar 2015 11:32:43 -0700
Subject: nir: Add gl_LocalInvocationID variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/nir.c                     | 2 ++
 src/glsl/nir/nir_intrinsics.h          | 1 +
 src/glsl/nir/nir_lower_system_values.c | 3 +++
 3 files changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index aafcb939e3a..1774e1c13f4 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1457,6 +1457,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_SAMPLE_POS;
    case nir_intrinsic_load_sample_mask_in:
       return SYSTEM_VALUE_SAMPLE_MASK_IN;
+   case nir_intrinsic_load_local_invocation_id:
+      return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
    /* FINISHME: Add tessellation intrinsics.
       return SYSTEM_VALUE_TESS_COORD;
       return SYSTEM_VALUE_VERTICES_IN;
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 07dd5cf1ec3..83966129462 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -141,6 +141,7 @@ SYSTEM_VALUE(sample_id, 1)
 SYSTEM_VALUE(sample_pos, 2)
 SYSTEM_VALUE(sample_mask_in, 1)
 SYSTEM_VALUE(invocation_id, 1)
+SYSTEM_VALUE(local_invocation_id, 3)
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index 440fb0b1b8e..91228e367a4 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -70,6 +70,9 @@ convert_instr(nir_intrinsic_instr *instr)
    case SYSTEM_VALUE_INVOCATION_ID:
       op = nir_intrinsic_load_invocation_id;
       break;
+   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+      op = nir_intrinsic_load_local_invocation_id;
+      break;
    default:
       unreachable("not reached");
    }
-- 
cgit v1.2.3


From c7161a3c3559f0450a90bb1228c74e8fdc9c939b Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 21 Nov 2014 18:47:49 -0800
Subject: i965/cs: Reserve local invocation id in payload regs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 29 +++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_cs.h   |  5 +++++
 src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 4 files changed, 45 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 980ef52fe17..757c77d459a 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -457,6 +457,35 @@ const struct brw_tracked_state brw_cs_state = {
 };
 
 
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width)
+{
+   return 3 * dispatch_width;
+}
+
+
 /**
  * Creates a region containing the push constants for the CS on gen7+.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 8404aa3e824..08310df77c1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -42,6 +42,11 @@ void
 brw_upload_cs_prog(struct brw_context *brw);
 
 #ifdef __cplusplus
+
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width);
+
 }
 #endif
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 320f612682d..10417c87484 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -42,6 +42,7 @@
 #include "brw_eu.h"
 #include "brw_wm.h"
 #include "brw_fs.h"
+#include "brw_cs.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
 #include "main/uniforms.h"
@@ -4731,6 +4732,15 @@ fs_visitor::setup_cs_payload()
    assert(devinfo->gen >= 7);
 
    payload.num_regs = 1;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      const unsigned local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+      assert((local_id_dwords & 0x7) == 0);
+      const unsigned local_id_regs = local_id_dwords / 8;
+      payload.local_invocation_id_reg = payload.num_regs;
+      payload.num_regs += local_id_regs;
+   }
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index dd0526a1550..c584cc70cb8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -364,6 +364,7 @@ public:
       uint8_t sample_pos_reg;
       uint8_t sample_mask_in_reg;
       uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+      uint8_t local_invocation_id_reg;
 
       /** The number of thread payload registers the hardware will supply. */
       uint8_t num_regs;
-- 
cgit v1.2.3


From b94b57f7c540e55d25e1cfd7392fc030022a4933 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 10 Oct 2014 08:33:23 -0700
Subject: i965/cs: Initialize gl_LocalInvocationID in push constant data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 56 +++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 757c77d459a..d273f99c5ca 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -324,6 +324,7 @@ brw_upload_cs_state(struct brw_context *brw)
    uint32_t offset;
    uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                                 8 * 4, 64, &offset);
+   struct gl_program *prog = (struct gl_program *) brw->compute_program;
    struct brw_stage_state *stage_state = &brw->cs.base;
    struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
    struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
@@ -340,8 +341,15 @@ brw_upload_cs_state(struct brw_context *brw)
                                             prog_data->binding_table.size_bytes,
                                             32, &stage_state->bind_bo_offset);
 
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
+
    unsigned push_constant_data_size =
-      prog_data->nr_params * sizeof(gl_constant_value);
+      (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
    unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
    unsigned push_constant_regs = reg_aligned_constant_size / 32;
    unsigned threads = get_cs_thread_count(cs_prog_data);
@@ -465,6 +473,9 @@ const struct brw_tracked_state brw_cs_state = {
  * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
  * registers worth of push constant space.
  *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords or
+ * fill_local_id_payload need to coordinated.
+ *
  * FINISHME: There are a few easy optimizations to consider.
  *
  * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
@@ -486,6 +497,31 @@ brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
 }
 
 
+static void
+fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+                      void *buffer, unsigned *x, unsigned *y, unsigned *z)
+{
+   uint32_t *param = (uint32_t *)buffer;
+   for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
+      param[0 * cs_prog_data->simd_size + i] = *x;
+      param[1 * cs_prog_data->simd_size + i] = *y;
+      param[2 * cs_prog_data->simd_size + i] = *z;
+
+      (*x)++;
+      if (*x == cs_prog_data->local_size[0]) {
+         *x = 0;
+         (*y)++;
+         if (*y == cs_prog_data->local_size[1]) {
+            *y = 0;
+            (*z)++;
+            if (*z == cs_prog_data->local_size[2])
+               *z = 0;
+         }
+      }
+   }
+}
+
+
 /**
  * Creates a region containing the push constants for the CS on gen7+.
  *
@@ -506,6 +542,12 @@ brw_upload_cs_push_constants(struct brw_context *brw,
    struct gl_context *ctx = &brw->ctx;
    const struct brw_stage_prog_data *prog_data =
       (brw_stage_prog_data*) cs_prog_data;
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
 
    /* Updates the ParamaterValues[i] pointers for all parameters of the
     * basic type of PROGRAM_STATE_VAR.
@@ -513,14 +555,14 @@ brw_upload_cs_push_constants(struct brw_context *brw,
    /* XXX: Should this happen somewhere before to get our state flag set? */
    _mesa_load_state_parameters(ctx, prog->Parameters);
 
-   if (prog_data->nr_params == 0) {
+   if (prog_data->nr_params == 0 && local_id_dwords == 0) {
       stage_state->push_const_size = 0;
    } else {
       gl_constant_value *param;
       unsigned i, t;
 
       const unsigned push_constant_data_size =
-         prog_data->nr_params * sizeof(gl_constant_value);
+         (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
       const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
       const unsigned param_aligned_count =
          reg_aligned_constant_size / sizeof(*param);
@@ -536,9 +578,15 @@ brw_upload_cs_push_constants(struct brw_context *brw,
       STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
 
       /* _NEW_PROGRAM_CONSTANTS */
+      unsigned x = 0, y = 0, z = 0;
       for (t = 0; t < threads; t++) {
+         gl_constant_value *next_param = &param[t * param_aligned_count];
+         if (local_id_dwords > 0) {
+            fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
+            next_param += local_id_dwords;
+         }
          for (i = 0; i < prog_data->nr_params; i++) {
-            param[t * param_aligned_count + i] = *prog_data->param[i];
+            next_param[i] = *prog_data->param[i];
          }
       }
 
-- 
cgit v1.2.3


From 43624361df1d5647665db793a38e229d89dfb851 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 21 Nov 2014 19:14:41 -0800
Subject: i965/cs: Initialize gl_LocalInvocationID from payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 25 +++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index d273f99c5ca..9ee5ae52798 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -473,8 +473,9 @@ const struct brw_tracked_state brw_cs_state = {
  * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
  * registers worth of push constant space.
  *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords or
- * fill_local_id_payload need to coordinated.
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
  *
  * FINISHME: There are a few easy optimizations to consider.
  *
@@ -522,6 +523,26 @@ fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
 }
 
 
+fs_reg *
+fs_visitor::emit_cs_local_invocation_id_setup()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
+
+   struct brw_reg src =
+      brw_vec8_grf(payload.local_invocation_id_reg, 0);
+   src = retype(src, BRW_REGISTER_TYPE_UD);
+   bld.MOV(*reg, src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 1), src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 2), src);
+
+   return reg;
+}
+
+
 /**
  * Creates a region containing the push constants for the CS on gen7+.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c584cc70cb8..6bfc29002a3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -275,6 +275,7 @@ public:
    void emit_fb_writes();
    void emit_urb_writes();
    void emit_cs_terminate();
+   fs_reg *emit_cs_local_invocation_id_setup();
 
    void emit_barrier();
 
-- 
cgit v1.2.3


From 49f999b9cb6ecb32cb27d10b47d234a176ae4c77 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 13 Mar 2015 11:34:48 -0700
Subject: i965/nir: Support gl_LocalInvocationID variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 8c3c4aed707..286ec283584 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -337,6 +337,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
                                  BRW_REGISTER_TYPE_D));
          break;
 
+      case nir_intrinsic_load_local_invocation_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_local_invocation_id_setup();
+         break;
+
       default:
          break;
       }
@@ -1709,6 +1716,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
       break;
 
+   case nir_intrinsic_load_local_invocation_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      for (unsigned i = 0; i < 3; i++)
+         bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+      break;
+   }
+
    default:
       unreachable("unknown intrinsic");
    }
-- 
cgit v1.2.3


From f5bb5a1bf180b80a4fee12d60e97ac4c6f80876c Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 10 Oct 2014 08:28:24 -0700
Subject: glsl/cs: Add gl_WorkGroupID variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/builtin_variables.cpp | 1 +
 src/glsl/shader_enums.h        | 1 +
 2 files changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index f185d939444..fbcec353a4a 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1055,6 +1055,7 @@ builtin_variable_generator::generate_cs_special_vars()
 {
    add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
                     "gl_LocalInvocationID");
+   add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
    /* TODO: finish this. */
 }
 
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 3b80a1dd4ce..7c598b64595 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -370,6 +370,7 @@ typedef enum
     */
    /*@{*/
    SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+   SYSTEM_VALUE_WORK_GROUP_ID,
    /*@}*/
 
    SYSTEM_VALUE_MAX             /**< Number of values */
-- 
cgit v1.2.3


From 4f178f0d8b7b4c410a1ac5532a26dfa51d3a72f1 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 13 Mar 2015 11:37:03 -0700
Subject: nir: Add gl_WorkGroupID system variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/nir.c                     | 2 ++
 src/glsl/nir/nir_intrinsics.h          | 1 +
 src/glsl/nir/nir_lower_system_values.c | 3 +++
 3 files changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 1774e1c13f4..96cf94c11d5 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1459,6 +1459,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_SAMPLE_MASK_IN;
    case nir_intrinsic_load_local_invocation_id:
       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+   case nir_intrinsic_load_work_group_id:
+      return SYSTEM_VALUE_WORK_GROUP_ID;
    /* FINISHME: Add tessellation intrinsics.
       return SYSTEM_VALUE_TESS_COORD;
       return SYSTEM_VALUE_VERTICES_IN;
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 83966129462..b981df1a71b 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -142,6 +142,7 @@ SYSTEM_VALUE(sample_pos, 2)
 SYSTEM_VALUE(sample_mask_in, 1)
 SYSTEM_VALUE(invocation_id, 1)
 SYSTEM_VALUE(local_invocation_id, 3)
+SYSTEM_VALUE(work_group_id, 3)
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index 91228e367a4..a656b27a72a 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -73,6 +73,9 @@ convert_instr(nir_intrinsic_instr *instr)
    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
       op = nir_intrinsic_load_local_invocation_id;
       break;
+   case SYSTEM_VALUE_WORK_GROUP_ID:
+      op = nir_intrinsic_load_work_group_id;
+      break;
    default:
       unreachable("not reached");
    }
-- 
cgit v1.2.3


From 4e454cb7c6dd189b433acd98c9595e464e0432b4 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 10 Oct 2014 08:28:24 -0700
Subject: i965/cs: Initialize gl_WorkGroupID variable from payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 19 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 20 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 9ee5ae52798..04f3e588436 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -643,3 +643,22 @@ const struct brw_tracked_state gen7_cs_push_constants = {
    },
    /* .emit = */ gen7_upload_cs_push_constants,
 };
+
+
+fs_reg *
+fs_visitor::emit_cs_work_group_id_setup()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
+
+   struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
+
+   bld.MOV(*reg, r0_1);
+   bld.MOV(offset(*reg, bld, 1), r0_6);
+   bld.MOV(offset(*reg, bld, 2), r0_7);
+
+   return reg;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 6bfc29002a3..5880f690ef1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -276,6 +276,7 @@ public:
    void emit_urb_writes();
    void emit_cs_terminate();
    fs_reg *emit_cs_local_invocation_id_setup();
+   fs_reg *emit_cs_work_group_id_setup();
 
    void emit_barrier();
 
-- 
cgit v1.2.3


From c5743a5d7fa62a339222ceb96d568a525d77fe0c Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 13 Mar 2015 11:39:53 -0700
Subject: i965/nir: Support gl_WorkGroupID variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 286ec283584..e4ddadc79a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -344,6 +344,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
             *reg = *v->emit_cs_local_invocation_id_setup();
          break;
 
+      case nir_intrinsic_load_work_group_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_work_group_id_setup();
+         break;
+
       default:
          break;
       }
@@ -1716,7 +1723,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
       break;
 
-   case nir_intrinsic_load_local_invocation_id: {
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_work_group_id: {
       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
       fs_reg val = nir_system_values[sv];
       assert(val.file != BAD_FILE);
-- 
cgit v1.2.3


From 34e187ec38cee78fbc0e1d1a09a99160fbdf7a9f Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 17 Aug 2015 12:30:25 -0700
Subject: glsl/cs: Don't strip gl_GlobalInvocationID and dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We lower gl_GlobalInvocationID based on the extension spec formula:

    gl_GlobalInvocationID =
        gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID

https://www.opengl.org/registry/specs/ARB/compute_shader.txt

We need to set this variable in main(), even if gl_GlobalInvocationID
is not referenced by the shader. (It may be used by a linked shader.)
Therefore, we can't eliminate these as dead variables.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/opt_dead_builtin_variables.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src')

diff --git a/src/glsl/opt_dead_builtin_variables.cpp b/src/glsl/opt_dead_builtin_variables.cpp
index 0d4e3a8f00a..90b753e012c 100644
--- a/src/glsl/opt_dead_builtin_variables.cpp
+++ b/src/glsl/opt_dead_builtin_variables.cpp
@@ -62,6 +62,16 @@ optimize_dead_builtin_variables(exec_list *instructions,
        * information, so removing these variables from the user shader will
        * cause problems later.
        *
+       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
+       * we avoid removing these dependencies.
+       *
+       * We also avoid removing gl_GlobalInvocationID at this stage because it
+       * might be used by a linked shader. In this case it still needs to be
+       * initialized by the main function.
+       *
+       *    gl_GlobalInvocationID =
+       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+       *
        * Matrix uniforms with "Transpose" are not eliminated because there's
        * an optimization pass that can turn references to the regular matrix
        * into references to the transpose matrix.  Eliminating the transpose
@@ -73,6 +83,10 @@ optimize_dead_builtin_variables(exec_list *instructions,
        */
       if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
           || strcmp(var->name, "gl_Vertex") == 0
+          || strcmp(var->name, "gl_WorkGroupID") == 0
+          || strcmp(var->name, "gl_WorkGroupSize") == 0
+          || strcmp(var->name, "gl_LocalInvocationID") == 0
+          || strcmp(var->name, "gl_GlobalInvocationID") == 0
           || strstr(var->name, "Transpose") != NULL)
          continue;
 
-- 
cgit v1.2.3


From c4d049f64603d934aed2a75cac86d4a3d4adadb7 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 17 Aug 2015 12:22:34 -0700
Subject: glsl: Move link_get_main_function_signature to a common location
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also rename to _mesa_get_main_function_signature.

We will call it near the end of compilation to insert some code into
main for initializing some compute shader global variables.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/builtin_functions.cpp | 28 ++++++++++++++++++++++++++++
 src/glsl/ir.h                  |  3 +++
 src/glsl/linker.cpp            | 31 ++-----------------------------
 src/glsl/linker.h              |  3 ---
 src/glsl/lower_vertex_id.cpp   |  2 +-
 5 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'src')

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 06a29bcfb10..7e77c9321b3 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -5121,4 +5121,32 @@ _mesa_glsl_get_builtin_function_shader()
    return builtins.shader;
 }
 
+
+/**
+ * Get the function signature for main from a shader
+ */
+ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh)
+{
+   ir_function *const f = sh->symbols->get_function("main");
+   if (f != NULL) {
+      exec_list void_parameters;
+
+      /* Look for the 'void main()' signature and ensure that it's defined.
+       * This keeps the linker from accidentally pick a shader that just
+       * contains a prototype for main.
+       *
+       * We don't have to check for multiple definitions of main (in multiple
+       * shaders) because that would have already been caught above.
+       */
+      ir_function_signature *sig =
+         f->matching_signature(NULL, &void_parameters, false);
+      if ((sig != NULL) && sig->is_defined) {
+         return sig;
+      }
+   }
+
+   return NULL;
+}
+
 /** @} */
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index bb7fa0e0af4..fce72a2d3c4 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -2529,6 +2529,9 @@ _mesa_glsl_find_builtin_function_by_name(const char *name);
 extern gl_shader *
 _mesa_glsl_get_builtin_function_shader(void);
 
+extern ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh);
+
 extern void
 _mesa_glsl_release_functions(void);
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 94f847e1e98..fd69dbc2c73 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1341,33 +1341,6 @@ move_non_declarations(exec_list *instructions, exec_node *last,
    return last;
 }
 
-/**
- * Get the function signature for main from a shader
- */
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh)
-{
-   ir_function *const f = sh->symbols->get_function("main");
-   if (f != NULL) {
-      exec_list void_parameters;
-
-      /* Look for the 'void main()' signature and ensure that it's defined.
-       * This keeps the linker from accidentally pick a shader that just
-       * contains a prototype for main.
-       *
-       * We don't have to check for multiple definitions of main (in multiple
-       * shaders) because that would have already been caught above.
-       */
-      ir_function_signature *sig =
-         f->matching_signature(NULL, &void_parameters, false);
-      if ((sig != NULL) && sig->is_defined) {
-	 return sig;
-      }
-   }
-
-   return NULL;
-}
-
 
 /**
  * This class is only used in link_intrastage_shaders() below but declaring
@@ -2040,7 +2013,7 @@ link_intrastage_shaders(void *mem_ctx,
     */
    gl_shader *main = NULL;
    for (unsigned i = 0; i < num_shaders; i++) {
-      if (link_get_main_function_signature(shader_list[i]) != NULL) {
+      if (_mesa_get_main_function_signature(shader_list[i]) != NULL) {
 	 main = shader_list[i];
 	 break;
       }
@@ -2072,7 +2045,7 @@ link_intrastage_shaders(void *mem_ctx,
     * copy of the original shader that contained the main function).
     */
    ir_function_signature *const main_sig =
-      link_get_main_function_signature(linked);
+      _mesa_get_main_function_signature(linked);
 
    /* Move any instructions other than variable declarations or function
     * declarations into main.
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index ce3dc323297..0999878c65a 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -26,9 +26,6 @@
 #ifndef GLSL_LINKER_H
 #define GLSL_LINKER_H
 
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh);
-
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,
 		    gl_shader **shader_list, unsigned num_shaders);
diff --git a/src/glsl/lower_vertex_id.cpp b/src/glsl/lower_vertex_id.cpp
index fc90bc8e66f..3da7a2f1b3b 100644
--- a/src/glsl/lower_vertex_id.cpp
+++ b/src/glsl/lower_vertex_id.cpp
@@ -130,7 +130,7 @@ lower_vertex_id(gl_shader *shader)
       return false;
 
    ir_function_signature *const main_sig =
-      link_get_main_function_signature(shader);
+      _mesa_get_main_function_signature(shader);
    if (main_sig == NULL) {
       assert(main_sig != NULL);
       return false;
-- 
cgit v1.2.3


From 2b6cc0395be4c3eb1c60c0d7a413e368b5ae3dbf Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 17 Aug 2015 14:35:44 -0700
Subject: glsl/cs: Initialize gl_GlobalInvocationID in main()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We initialize gl_GlobalInvocationID based on the extension spec
formula:

    gl_GlobalInvocationID =
        gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID

https://www.opengl.org/registry/specs/ARB/compute_shader.txt

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/builtin_variables.cpp  | 67 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/glsl_parser_extras.cpp |  2 ++
 src/glsl/ir.h                   |  3 ++
 3 files changed, 72 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index fbcec353a4a..34d4006f86f 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -22,6 +22,8 @@
  */
 
 #include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
 #include "glsl_parser_extras.h"
 #include "glsl_symbol_table.h"
 #include "main/core.h"
@@ -29,6 +31,8 @@
 #include "program/prog_statevars.h"
 #include "program/prog_instruction.h"
 
+using namespace ir_builder;
+
 static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = {
    {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX}
 };
@@ -1056,6 +1060,7 @@ builtin_variable_generator::generate_cs_special_vars()
    add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
                     "gl_LocalInvocationID");
    add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
+   add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
    /* TODO: finish this. */
 }
 
@@ -1207,3 +1212,65 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
       break;
    }
 }
+
+
+/**
+ * Initialize compute shader variables with values that are derived from other
+ * compute shader variable.
+ */
+static void
+initialize_cs_derived_variables(gl_shader *shader,
+                                ir_function_signature *const main_sig)
+{
+   assert(shader->Stage == MESA_SHADER_COMPUTE);
+
+   ir_variable *gl_GlobalInvocationID =
+      shader->symbols->get_variable("gl_GlobalInvocationID");
+   assert(gl_GlobalInvocationID);
+   ir_variable *gl_WorkGroupID =
+      shader->symbols->get_variable("gl_WorkGroupID");
+   assert(gl_WorkGroupID);
+   ir_variable *gl_WorkGroupSize =
+      shader->symbols->get_variable("gl_WorkGroupSize");
+   if (gl_WorkGroupSize == NULL) {
+      void *const mem_ctx = ralloc_parent(shader->ir);
+      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
+                                                  "gl_WorkGroupSize",
+                                                  ir_var_auto);
+      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
+      gl_WorkGroupSize->data.read_only = true;
+      shader->ir->push_head(gl_WorkGroupSize);
+   }
+   ir_variable *gl_LocalInvocationID =
+      shader->symbols->get_variable("gl_LocalInvocationID");
+   assert(gl_LocalInvocationID);
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+}
+
+
+/**
+ * Initialize builtin variables with values based on other builtin variables.
+ * These are initialized in the main function.
+ */
+void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader)
+{
+   /* We only need to set CS variables currently. */
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return;
+
+   ir_function_signature *const main_sig =
+      _mesa_get_main_function_signature(shader);
+   if (main_sig == NULL)
+      return;
+
+   initialize_cs_derived_variables(shader, main_sig);
+}
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index fb76614133d..dae5261daca 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1694,6 +1694,8 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
       }
    }
 
+   _mesa_glsl_initialize_derived_variables(shader);
+
    delete state->symbols;
    ralloc_free(state);
 }
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index fce72a2d3c4..f9ddf7442b0 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -2513,6 +2513,9 @@ extern void
 _mesa_glsl_initialize_variables(exec_list *instructions,
 				struct _mesa_glsl_parse_state *state);
 
+extern void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader);
+
 extern void
 _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
 
-- 
cgit v1.2.3


From 6823e12d5aa4646fc8ef0e32455104ba47f80a38 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 17 Aug 2015 15:49:44 -0700
Subject: glsl/cs: Exclude gl_LocalInvocationIndex from builtin variable
 stripping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We lower gl_LocalInvocationIndex based on the extension spec formula:

    gl_LocalInvocationIndex =
        gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
        gl_LocalInvocationID.y * gl_WorkGroupSize.x +
        gl_LocalInvocationID.x;

https://www.opengl.org/registry/specs/ARB/compute_shader.txt

We need to set this variable in main(), even if gl_LocalInvocationIndex
is not referenced by the shader. (It may be used by a linked shader.)
Therefore, we can't eliminate it as a dead variable.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/opt_dead_builtin_variables.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/glsl/opt_dead_builtin_variables.cpp b/src/glsl/opt_dead_builtin_variables.cpp
index 90b753e012c..03e578982b9 100644
--- a/src/glsl/opt_dead_builtin_variables.cpp
+++ b/src/glsl/opt_dead_builtin_variables.cpp
@@ -72,6 +72,13 @@ optimize_dead_builtin_variables(exec_list *instructions,
        *    gl_GlobalInvocationID =
        *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
        *
+       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
+       *
+       *    gl_LocalInvocationIndex =
+       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+       *       gl_LocalInvocationID.x;
+       *
        * Matrix uniforms with "Transpose" are not eliminated because there's
        * an optimization pass that can turn references to the regular matrix
        * into references to the transpose matrix.  Eliminating the transpose
@@ -87,6 +94,7 @@ optimize_dead_builtin_variables(exec_list *instructions,
           || strcmp(var->name, "gl_WorkGroupSize") == 0
           || strcmp(var->name, "gl_LocalInvocationID") == 0
           || strcmp(var->name, "gl_GlobalInvocationID") == 0
+          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
           || strstr(var->name, "Transpose") != NULL)
          continue;
 
-- 
cgit v1.2.3


From c4cf824658da283cf350fd6833d50dff9de4a1ad Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Mon, 17 Aug 2015 16:32:42 -0700
Subject: glsl/cs: Initialize gl_LocalInvocationIndex in main()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We initialize gl_LocalInvocationIndex based on the extension spec
formula:

    gl_LocalInvocationIndex =
        gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
        gl_LocalInvocationID.y * gl_WorkGroupSize.x +
        gl_LocalInvocationID.x;

https://www.opengl.org/registry/specs/ARB/compute_shader.txt

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/builtin_variables.cpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index 34d4006f86f..b5e2908f951 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -402,6 +402,7 @@ private:
 
    const glsl_type * const bool_t;
    const glsl_type * const int_t;
+   const glsl_type * const uint_t;
    const glsl_type * const float_t;
    const glsl_type * const vec2_t;
    const glsl_type * const vec3_t;
@@ -420,6 +421,7 @@ builtin_variable_generator::builtin_variable_generator(
    : instructions(instructions), state(state), symtab(state->symbols),
      compatibility(!state->is_version(140, 100)),
      bool_t(glsl_type::bool_type), int_t(glsl_type::int_type),
+     uint_t(glsl_type::uint_type),
      float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type),
      vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type),
      uvec3_t(glsl_type::uvec3_type),
@@ -1061,6 +1063,7 @@ builtin_variable_generator::generate_cs_special_vars()
                     "gl_LocalInvocationID");
    add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
    add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
+   add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
    /* TODO: finish this. */
 }
 
@@ -1253,6 +1256,25 @@ initialize_cs_derived_variables(gl_shader *shader,
              add(mul(gl_WorkGroupID, gl_WorkGroupSize),
                  gl_LocalInvocationID));
    main_sig->body.push_head(inst);
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
+          swizzle_y(gl_WorkGroupSize));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_variable *gl_LocalInvocationIndex =
+      shader->symbols->get_variable("gl_LocalInvocationIndex");
+   assert(gl_LocalInvocationIndex);
+   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
 }
 
 
-- 
cgit v1.2.3


From d173c5e77d239618e9609788e3d23ac9180a58a5 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 21:37:23 -0400
Subject: tgsi: add a TXQS opcode to retrieve the number of texture samples

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/auxiliary/tgsi/tgsi_info.c     |  3 ++-
 src/gallium/docs/source/tgsi.rst           | 12 +++++++++++-
 src/gallium/include/pipe/p_shader_tokens.h |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index fb29ea0d53d..3b40c3de97d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
    { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
    { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 },     /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
    { 0, 0, 0, 0, 0, 0, NONE, "", 105 },     /* removed */
    { 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
    { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
    case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
    case TGSI_OPCODE_TXQ:
    case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
    case TGSI_OPCODE_F2U:
    case TGSI_OPCODE_UDIV:
    case TGSI_OPCODE_UMAD:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 314c9ca8fa2..314fe1bb74f 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong here.
   For components which don't return a resource dimension, their value
   is undefined.
 
-
 .. math::
 
   lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong here.
 
   dst.w = texture\_levels(unit)
 
+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the texture, and stores it
+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
 .. opcode:: TG4 - Texture Gather
 
   As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 6e07b2c5c7c..b36e0a35b8d 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -402,6 +402,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_ENDLOOP             101
 #define TGSI_OPCODE_ENDSUB              102
 #define TGSI_OPCODE_TXQ_LZ              103 /* TXQ for mipmap level 0 */
+#define TGSI_OPCODE_TXQS                104
                                 /* gap */
 #define TGSI_OPCODE_NOP                 107
 
-- 
cgit v1.2.3


From f46a53ffa50f186df347a181ca336fc3e9f96eaf Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 11 Sep 2015 17:29:49 -0400
Subject: gallium: add PIPE_CAP_TGSI_TXQS to let st know if TXQS is supported

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
---
 src/gallium/docs/source/screen.rst               | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c           | 1 +
 src/gallium/drivers/ilo/ilo_screen.c             | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c         | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c           | 1 +
 src/gallium/drivers/r600/r600_pipe.c             | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c           | 1 +
 src/gallium/drivers/softpipe/sp_screen.c         | 1 +
 src/gallium/drivers/svga/svga_screen.c           | 1 +
 src/gallium/drivers/vc4/vc4_screen.c             | 1 +
 src/gallium/include/pipe/p_defines.h             | 1 +
 15 files changed, 15 insertions(+)

(limited to 'src')

diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 2c0da016d08..e7800472f44 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -267,6 +267,7 @@ The integer capabilities:
 * ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
   bounds_max states of pipe_depth_stencil_alpha_state behave according
   to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 17dd47c71ab..8000279ae80 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -235,6 +235,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
+	case PIPE_CAP_TGSI_TXQS:
 		return 0;
 
 	case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 19a94a8e019..51c64edda22 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -247,6 +247,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index ab4d1377c9f..9e37e24014a 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -469,6 +469,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 14eeab03387..697e3d97c19 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -296,6 +296,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
    }
    /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index efa3a59f450..806d4e6b04f 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -169,6 +169,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 30e6e042fbf..6b7f25085fe 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -214,6 +214,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ab19b26f156..220c2aab9de 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -200,6 +200,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 4ca0b268bde..e669ba2edde 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -195,6 +195,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
         case PIPE_CAP_DEPTH_BOUNDS_TEST:
+        case PIPE_CAP_TGSI_TXQS:
             return 0;
 
         /* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a18ec49215c..ca9bd76e9eb 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -341,6 +341,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
+	case PIPE_CAP_TGSI_TXQS:
 		return 0;
 
 	/* Stream output. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 9094427cef8..ae1ff7eef43 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -325,6 +325,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_VERTEXID_NOBASE:
+	case PIPE_CAP_TGSI_TXQS:
 		return 0;
 
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 7ca8a67e109..d8606f3c07e 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -246,6 +246,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
    }
    /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index f2ae40b4fad..44b6f4a4260 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -379,6 +379,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 0;
    }
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 2dee1d40e5f..c4b52e1e61d 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -180,6 +180,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
+	case PIPE_CAP_TGSI_TXQS:
                 return 0;
 
                 /* Stream output. */
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 88e37e9f056..47fa82a1e20 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -630,6 +630,7 @@ enum pipe_cap
    PIPE_CAP_TEXTURE_FLOAT_LINEAR,
    PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
    PIPE_CAP_DEPTH_BOUNDS_TEST,
+   PIPE_CAP_TGSI_TXQS,
 };
 
 #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
-- 
cgit v1.2.3


From 4294db90b1804dd213b0b4b3ff4eb46a5c390c76 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 22:07:27 -0400
Subject: nv50/ir: add support for TXQS tgsi opcode

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  4 ++--
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 22 +++++++++++++++++-----
 .../nouveau/codegen/nv50_ir_lowering_nv50.cpp      | 20 ++++++++++++++++++++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c     |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 +-
 5 files changed, 41 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index ba1b0851927..f6e93081e76 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -824,8 +824,8 @@ private:
 
 enum TexQuery
 {
-   TXQ_DIMS,
-   TXQ_TYPE,
+   TXQ_DIMS, /* x, y, z, levels */
+   TXQ_TYPE, /* ?, ?, samples, ? */
    TXQ_SAMPLE_POSITION,
    TXQ_FILTER,
    TXQ_LOD,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f153674e9ce..c8efaf5947a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -631,6 +631,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
    NV50_IR_OPCODE_CASE(SAD, SAD);
    NV50_IR_OPCODE_CASE(TXF, TXF);
    NV50_IR_OPCODE_CASE(TXQ, TXQ);
+   NV50_IR_OPCODE_CASE(TXQS, TXQ);
    NV50_IR_OPCODE_CASE(TG4, TXG);
    NV50_IR_OPCODE_CASE(LODQ, TXLQ);
 
@@ -1324,7 +1325,7 @@ private:
    void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
    void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
    void handleTXF(Value *dst0[4], int R, int L_M);
-   void handleTXQ(Value *dst0[4], enum TexQuery);
+   void handleTXQ(Value *dst0[4], enum TexQuery, int R);
    void handleLIT(Value *dst0[4]);
    void handleUserClipPlanes();
 
@@ -1795,7 +1796,7 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
 }
 
 void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
 {
    TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
    tex->tex.query = query;
@@ -1807,9 +1808,12 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
       tex->tex.mask |= 1 << c;
       tex->setDef(d++, dst0[c]);
    }
-   tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   if (query == TXQ_DIMS)
+      tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   else
+      tex->setSrc((c = 0), zero);
 
-   setTexRS(tex, ++c, 1, -1);
+   setTexRS(tex, ++c, R, -1);
 
    bb->insertTail(tex);
 }
@@ -2764,7 +2768,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       break;
    case TGSI_OPCODE_TXQ:
    case TGSI_OPCODE_SVIEWINFO:
-      handleTXQ(dst0, TXQ_DIMS);
+      handleTXQ(dst0, TXQ_DIMS, 1);
+      break;
+   case TGSI_OPCODE_TXQS:
+      // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+      // be in .x
+      dst0[1] = dst0[2] = dst0[3] = NULL;
+      std::swap(dst0[0], dst0[2]);
+      handleTXQ(dst0, TXQ_TYPE, 0);
+      std::swap(dst0[0], dst0[2]);
       break;
    case TGSI_OPCODE_F2I:
    case TGSI_OPCODE_F2U:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index d87cdfff851..eec502be798 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -617,6 +617,7 @@ private:
    bool handleTXL(TexInstruction *); // hate
    bool handleTXD(TexInstruction *); // these 3
    bool handleTXLQ(TexInstruction *);
+   bool handleTXQ(TexInstruction *);
 
    bool handleCALL(Instruction *);
    bool handlePRECONT(Instruction *);
@@ -974,6 +975,23 @@ NV50LoweringPreSSA::handleTXLQ(TexInstruction *i)
    return true;
 }
 
+bool
+NV50LoweringPreSSA::handleTXQ(TexInstruction *i)
+{
+   Value *ms, *ms_x, *ms_y;
+   if (i->tex.query == TXQ_DIMS)
+      return true;
+   assert(i->tex.query == TXQ_TYPE);
+   assert(i->tex.mask == 4);
+
+   loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
+   bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.loadImm(NULL, 1), ms);
+   i->bb->remove(i);
+
+   return true;
+}
+
+
 bool
 NV50LoweringPreSSA::handleSET(Instruction *i)
 {
@@ -1333,6 +1351,8 @@ NV50LoweringPreSSA::visit(Instruction *i)
       return handleTXD(i->asTex());
    case OP_TXLQ:
       return handleTXLQ(i->asTex());
+   case OP_TXQ:
+      return handleTXQ(i->asTex());
    case OP_EX2:
       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
       i->setSrc(0, i->getDef(0));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6b7f25085fe..9068ae1afaf 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -179,6 +179,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -214,7 +215,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 220c2aab9de..ead43f86262 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -178,6 +178,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -200,7 +201,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_TGSI_TXQS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
-- 
cgit v1.2.3


From ec3fe42b3a7c8d8a33d66aa7e3a43330b9c3c926 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 22:33:34 -0400
Subject: r600g: add support for TXQS tgsi opcode

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
---
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index ca9bd76e9eb..36d7e68a8f2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -273,6 +273,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+	case PIPE_CAP_TGSI_TXQS:
 		return 1;
 
 	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -341,7 +342,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
-	case PIPE_CAP_TGSI_TXQS:
 		return 0;
 
 	/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f83ea62863e..1d905822cde 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5674,6 +5674,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	/* Texture fetch instructions can only use gprs as source.
 	 * Also they cannot negate the source or take the absolute value */
 	const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+					      inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
                                               tgsi_tex_src_requires_loading(ctx, 0)) ||
 					     read_compressed_msaa || txf_add_offsets;
 
@@ -6419,6 +6420,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		tex.dst_sel_z = 7;
 		tex.dst_sel_w = 7;
 	}
+	else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+		tex.dst_sel_x = 3;
+		tex.dst_sel_y = 7;
+		tex.dst_sel_z = 7;
+		tex.dst_sel_w = 7;
+	}
 	else {
 		tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
 		tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
@@ -6427,7 +6434,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	}
 
 
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
+	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
+	    inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
 		tex.src_sel_x = 4;
 		tex.src_sel_y = 4;
 		tex.src_sel_z = 4;
@@ -7935,7 +7943,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
 	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXQ_LZ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-	[104]			= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
 	[105]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -8134,7 +8142,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
 	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXQ_LZ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-	[104]			= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
 	[105]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -8356,7 +8364,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_ENDLOOP]	= { ALU_OP0_NOP, tgsi_endloop},
 	[TGSI_OPCODE_ENDSUB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXQ_LZ]	= { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-	[104]			= { ALU_OP0_NOP, tgsi_unsupported},
+	[TGSI_OPCODE_TXQS]	= { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
 	[105]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[106]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_NOP]	= { ALU_OP0_NOP, tgsi_unsupported},
-- 
cgit v1.2.3


From 67d2d3ba433a43bbf03b80c1d7e140e1fc9de2f6 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 21:44:45 -0400
Subject: st/mesa: emit TXQS, support ARB_shader_texture_image_samples

The image component of the ext is a no-op since there is no image support
in gallium (yet).

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 docs/GL3.txt                               | 2 +-
 docs/relnotes/11.1.0.html                  | 2 +-
 src/mesa/state_tracker/st_extensions.c     | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++++-
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 97f642bd135..e0a8a371c59 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -194,7 +194,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_direct_state_access                           DONE (all drivers)
   GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  DONE (i965)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600)
   GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
   GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
   GL_KHR_robust_buffer_access_behavior                 not started
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 528905f772b..6abdf7ad855 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
-<li>GL_ARB_shader_texture_image_samples on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 </ul>
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 884761c6c30..e2902923cb7 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -449,6 +449,7 @@ void st_init_extensions(struct pipe_screen *screen,
       { o(ARB_point_sprite),                 PIPE_CAP_POINT_SPRITE                     },
       { o(ARB_seamless_cube_map),            PIPE_CAP_SEAMLESS_CUBE_MAP                },
       { o(ARB_shader_stencil_export),        PIPE_CAP_SHADER_STENCIL_EXPORT            },
+      { o(ARB_shader_texture_image_samples), PIPE_CAP_TGSI_TXQS                        },
       { o(ARB_shader_texture_lod),           PIPE_CAP_SM3                              },
       { o(ARB_shadow),                       PIPE_CAP_TEXTURE_SHADOW_MAP               },
       { o(ARB_texture_buffer_object),        PIPE_CAP_TEXTURE_BUFFER_OBJECTS           },
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 625c4e9c8a6..c3a8c119b1e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3229,7 +3229,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       opcode = TGSI_OPCODE_LODQ;
       break;
    case ir_texture_samples:
-      unreachable("unexpected texture op");
+      opcode = TGSI_OPCODE_TXQS;
+      break;
    }
 
    if (ir->projector) {
@@ -3339,6 +3340,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
          emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
       } else
          inst = emit_asm(ir, opcode, result_dst, lod_info);
+   } else if (opcode == TGSI_OPCODE_TXQS) {
+      inst = emit_asm(ir, opcode, result_dst);
    } else if (opcode == TGSI_OPCODE_TXF) {
       inst = emit_asm(ir, opcode, result_dst, coord);
    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
@@ -5030,6 +5033,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
    case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXQS:
    case TGSI_OPCODE_TXF:
    case TGSI_OPCODE_TEX2:
    case TGSI_OPCODE_TXB2:
-- 
cgit v1.2.3


From 79f1a7ae28c37f77e08e550cd077959a2a1f8341 Mon Sep 17 00:00:00 2001
From: Antia Puentes <apuentes@igalia.com>
Date: Wed, 5 Aug 2015 15:57:33 +0200
Subject: i965/vec4: Fix saturation errors when coalescing registers

If the register types do not match and the instruction
that contains the final destination is saturated, register
coalescing generated non-equivalent code.

This did not happen when using IR because types usually
matched, but it is visible in nir-vec4.

For example,
   mov      vgrf7:D vgrf2:D
   mov.sat  m4:F vgrf7:F

is coalesced to:
   mov.sat  m4:D vgrf2:D

The patch prevents coalescing in such scenario, unless the
instruction we want to coalesce into is a MOV (without type
conversion implied). In that case, the patch sets the register
types to the type of the final destination.

Shader-db results in HSW (only vec4 instructions shown):

total instructions in shared programs: 1754415 -> 1754416 (0.00%)
instructions in affected programs:     74 -> 75 (1.35%)
helped:                                0
HURT:                                  1
GAINED:                                0
LOST:                                  0

Only one extra instruction in one of the shaders, that comes from
eliminating a saturation error by preventing register coalesce.

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9d863c273e9..181768bddea 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1065,6 +1065,17 @@ vec4_visitor::opt_register_coalesce()
                }
             }
 
+            /* This doesn't handle saturation on the instruction we
+             * want to coalesce away if the register types do not match.
+             * But if scan_inst is a non type-converting 'mov', we can fix
+             * the types later.
+             */
+            if (inst->saturate &&
+                inst->dst.type != scan_inst->dst.type &&
+                !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                  scan_inst->dst.type == scan_inst->src[0].type))
+               break;
+
             /* If we can't handle the swizzle, bail. */
             if (!scan_inst->can_reswizzle(inst->dst.writemask,
                                           inst->src[0].swizzle,
@@ -1142,6 +1153,16 @@ vec4_visitor::opt_register_coalesce()
 	       scan_inst->dst.file = inst->dst.file;
 	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->dst.reg_offset = inst->dst.reg_offset;
+               if (inst->saturate &&
+                   inst->dst.type != scan_inst->dst.type) {
+                  /* If we have reached this point, scan_inst is a non
+                   * type-converting 'mov' and we can modify its register types
+                   * to match the ones in inst. Otherwise, we could have an
+                   * incorrect saturation result.
+                   */
+                  scan_inst->dst.type = inst->dst.type;
+                  scan_inst->src[0].type = inst->src[0].type;
+               }
 	       scan_inst->saturate |= inst->saturate;
 	    }
 	    scan_inst = (vec4_instruction *)scan_inst->next;
-- 
cgit v1.2.3


From b8d2263c83d29f4626ac0fe0316978aa6262aefb Mon Sep 17 00:00:00 2001
From: Antia Puentes <apuentes@igalia.com>
Date: Mon, 14 Sep 2015 09:50:59 +0200
Subject: i965/vec4_nir: Load constants as integers

Loads constants using integer as their register type, like it is
done in FS backend.

No shader-db changes in HSW.

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91716
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index c21fd02bf68..175d92b6b31 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -450,7 +450,7 @@ void
 vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
 {
    dst_reg reg = dst_reg(GRF, alloc.allocate(1));
-   reg.type =  BRW_REGISTER_TYPE_F;
+   reg.type =  BRW_REGISTER_TYPE_D;
 
    unsigned remaining = brw_writemask_for_size(instr->def.num_components);
 
@@ -471,7 +471,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
       }
 
       reg.writemask = writemask;
-      emit(MOV(reg, src_reg(instr->value.f[i])));
+      emit(MOV(reg, src_reg(instr->value.i[i])));
 
       remaining &= ~writemask;
    }
-- 
cgit v1.2.3


From 64aee8fe9f28646ceb5b9b62db5766365d6da5c7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 14 Sep 2015 11:21:07 -0400
Subject: vc4: Fix build from recent NIR cleanups.

---
 src/gallium/drivers/vc4/vc4_nir_lower_io.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index b632370cbb2..c401415fda7 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -47,8 +47,7 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
         /* Replace the old intrinsic with a reference to our reconstructed
          * vec4.
          */
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
-                                 ralloc_parent(b->impl));
+        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
         nir_instr_remove(&intr->instr);
 }
 
-- 
cgit v1.2.3


From 5877a594d54fdd2b3aa329f4d35b3491a7ee8a33 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 10 Sep 2015 23:58:17 -0400
Subject: nvc0/ir: start offset at texBindBase for txq, like regular texturing

Curiously this has no actual effect. I think it's because the first 8
textures are bound in multiple slots for some reason. However seems
prudent to use these the same way as regular texturing, esp in the case
where there are more than 8 textures bound.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index b1f406585a9..b3fc73a2beb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -962,11 +962,14 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
 bool
 NVC0LoweringPass::handleTXQ(TexInstruction *txq)
 {
+   const int chipset = prog->getTarget()->getChipset();
+   if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0)
+      txq->tex.r += prog->driver->io.texBindBase / 4;
+
    if (txq->tex.rIndirectSrc < 0)
       return true;
 
    Value *ticRel = txq->getIndirectR();
-   const int chipset = prog->getTarget()->getChipset();
 
    txq->setIndirectS(NULL);
    txq->tex.sIndirectSrc = -1;
-- 
cgit v1.2.3


From 64e25167ed284619dacab42fdada0bb0fea71321 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Fri, 24 Jul 2015 15:53:58 -0700
Subject: meta: Abort meta pbo path if TexSubImage need signed unsigned
 conversion

See similar fix for Readpixels in mesa commit 0d20790. Jason suggested
we need that for TexSubImage as well.

Cc: <mesa-stable@lists.freedesktop.org>
Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/common/meta_tex_subimage.c | 43 +++++++++++++++++------------
 1 file changed, 25 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index 33c22aa139d..181dde9d045 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -45,6 +45,24 @@
 #include "uniforms.h"
 #include "varray.h"
 
+static bool
+need_signed_unsigned_int_conversion(mesa_format mesaFormat,
+                                    GLenum format, GLenum type)
+{
+   const GLenum mesaFormatType = _mesa_get_format_datatype(mesaFormat);
+   const bool is_format_integer = _mesa_is_enum_format_integer(format);
+   return (mesaFormatType == GL_INT &&
+           is_format_integer &&
+           (type == GL_UNSIGNED_INT ||
+            type == GL_UNSIGNED_SHORT ||
+            type == GL_UNSIGNED_BYTE)) ||
+          (mesaFormatType == GL_UNSIGNED_INT &&
+           is_format_integer &&
+           (type == GL_INT ||
+            type == GL_SHORT ||
+            type == GL_BYTE));
+}
+
 static struct gl_texture_image *
 create_texture_for_pbo(struct gl_context *ctx,
                        bool create_pbo, GLenum pbo_target,
@@ -183,6 +201,13 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
    if (ctx->_ImageTransferState)
       return false;
 
+   /* This function rely on BlitFramebuffer to fill in the pixel data for
+    * glTex[Sub]Image*D. But, BlitFrameBuffer doesn't support signed to
+    * unsigned or unsigned to signed integer conversions.
+    */
+   if (need_signed_unsigned_int_conversion(tex_image->TexFormat, format, type))
+      return false;
+
    /* For arrays, use a tall (height * depth) 2D texture but taking into
     * account the inter-image padding specified with the image height packing
     * property.
@@ -266,24 +291,6 @@ fail:
    return success;
 }
 
-static bool
-need_signed_unsigned_int_conversion(mesa_format rbFormat,
-                                    GLenum format, GLenum type)
-{
-   const GLenum srcType = _mesa_get_format_datatype(rbFormat);
-   const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
-   return (srcType == GL_INT &&
-           is_dst_format_integer &&
-           (type == GL_UNSIGNED_INT ||
-            type == GL_UNSIGNED_SHORT ||
-            type == GL_UNSIGNED_BYTE)) ||
-          (srcType == GL_UNSIGNED_INT &&
-           is_dst_format_integer &&
-           (type == GL_INT ||
-            type == GL_SHORT ||
-            type == GL_BYTE));
-}
-
 bool
 _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                               struct gl_texture_image *tex_image,
-- 
cgit v1.2.3


From dc70c86b9b485cb5006a55cc2efd1f154dbfd469 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Fri, 4 Sep 2015 16:35:34 -0700
Subject: i965: Move compute shader code around
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This moves the compute shader code around in order to make the way the
code is split up more consistent. There should be no functional changes.
Typically we have a few files per stage:

    brw_vs.c, brw_wm.c brw_gs.c:

        code to drive code generation and implement precompiling and
        cache search.

    genX_<stage>_state.c

        gen specific implementation of the state emission for the shader
        stage.

The brw_*_emit() functions are all in the same files as the visitor
classes they use (with the exception of VS, which may use either vec4 or
fs).

To make compute follow this convention, we move the brw_cs_emit()
function into brw_fs.cpp. We can then rename brw_cs.cpp to brw_cs.c and
do this in C like the other similar files.  Finally, move state setup
and atoms to gen7_cs_state.c.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/Makefile.sources |   3 +-
 src/mesa/drivers/dri/i965/brw_cs.c         | 194 +++++++++
 src/mesa/drivers/dri/i965/brw_cs.cpp       | 664 -----------------------------
 src/mesa/drivers/dri/i965/brw_cs.h         |  10 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp       | 143 +++++++
 src/mesa/drivers/dri/i965/gen7_cs_state.c  | 347 +++++++++++++++
 6 files changed, 695 insertions(+), 666 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp
 create mode 100644 src/mesa/drivers/dri/i965/gen7_cs_state.c

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index dfdad75329d..b53802cf18d 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -21,7 +21,7 @@ i965_FILES = \
 	brw_conditional_render.c \
 	brw_context.c \
 	brw_context.h \
-	brw_cs.cpp \
+	brw_cs.c \
 	brw_cs.h \
 	brw_cubemap_normalize.cpp \
 	brw_curbe.c \
@@ -163,6 +163,7 @@ i965_FILES = \
 	gen6_wm_state.c \
 	gen7_blorp.cpp \
 	gen7_blorp.h \
+	gen7_cs_state.c \
 	gen7_disable.c \
 	gen7_gs_state.c \
 	gen7_misc_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
new file mode 100644
index 00000000000..012c46698e7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2014 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+bool
+brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+{
+   const struct brw_cs_prog_data *a =
+      (const struct brw_cs_prog_data *)in_a;
+   const struct brw_cs_prog_data *b =
+      (const struct brw_cs_prog_data *)in_b;
+
+   /* Compare the base structure. */
+   if (!brw_stage_prog_data_compare(&a->base, &b->base))
+      return false;
+
+   /* Compare the rest of the structure. */
+   const unsigned offset = sizeof(struct brw_stage_prog_data);
+   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+              sizeof(struct brw_cs_prog_data) - offset))
+      return false;
+
+   return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+                    struct gl_shader_program *prog,
+                    struct brw_compute_program *cp,
+                    struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLuint *program;
+   void *mem_ctx = ralloc_context(NULL);
+   GLuint program_size;
+   struct brw_cs_prog_data prog_data;
+
+   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+   assert (cs);
+
+   memset(&prog_data, 0, sizeof(prog_data));
+
+   /* Allocate the references to the uniforms that will end up in the
+    * prog_data associated with the compiled program, and which will be freed
+    * by the state cache.
+    */
+   int param_count = cs->num_uniform_components +
+                     cs->NumImages * BRW_IMAGE_PARAM_SIZE;
+
+   /* The backend also sometimes adds params for texture size. */
+   param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+   prog_data.base.param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.pull_param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
+   prog_data.base.nr_params = param_count;
+   prog_data.base.nr_image_params = cs->NumImages;
+
+   program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
+                         &cp->program, prog, &program_size);
+   if (program == NULL) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+
+   if (prog_data.base.total_scratch) {
+      brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
+                         prog_data.base.total_scratch * brw->max_cs_threads);
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+      fprintf(stderr, "\n");
+
+   brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
+                    key, sizeof(*key),
+                    program, program_size,
+                    &prog_data, sizeof(prog_data),
+                    &brw->cs.base.prog_offset, &brw->cs.prog_data);
+   ralloc_free(mem_ctx);
+
+   return true;
+}
+
+
+static void
+brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct brw_compute_program *cp =
+      (struct brw_compute_program *) brw->compute_program;
+   const struct gl_program *prog = (struct gl_program *) cp;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_TEXTURE */
+   brw_populate_sampler_prog_key_data(ctx, prog, brw->cs.base.sampler_count,
+                                      &key->tex);
+
+   /* The unique compute program ID */
+   key->program_string_id = cp->id;
+}
+
+
+void
+brw_upload_cs_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_cs_prog_key key;
+   struct brw_compute_program *cp = (struct brw_compute_program *)
+      brw->compute_program;
+
+   if (!cp)
+      return;
+
+   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
+      return;
+
+   brw->cs.base.sampler_count =
+      _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed);
+
+   brw_cs_populate_key(brw, &key);
+
+   if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
+                         &key, sizeof(key),
+                         &brw->cs.base.prog_offset, &brw->cs.prog_data)) {
+      bool success =
+         brw_codegen_cs_prog(brw,
+                             ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE],
+                             cp, &key);
+      (void) success;
+      assert(success);
+   }
+   brw->cs.base.prog_data = &brw->cs.prog_data->base;
+}
+
+
+bool
+brw_cs_precompile(struct gl_context *ctx,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_cs_prog_key key;
+
+   struct gl_compute_program *cp = (struct gl_compute_program *) prog;
+   struct brw_compute_program *bcp = brw_compute_program(cp);
+
+   memset(&key, 0, sizeof(key));
+   key.program_string_id = bcp->id;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+   uint32_t old_prog_offset = brw->cs.base.prog_offset;
+   struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data;
+
+   bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key);
+
+   brw->cs.base.prog_offset = old_prog_offset;
+   brw->cs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
deleted file mode 100644
index 04f3e588436..00000000000
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ /dev/null
@@ -1,664 +0,0 @@
-/*
- * Copyright (c) 2014 - 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "util/ralloc.h"
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_fs.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-#include "intel_mipmap_tree.h"
-#include "brw_state.h"
-#include "intel_batchbuffer.h"
-
-extern "C"
-bool
-brw_cs_prog_data_compare(const void *in_a, const void *in_b)
-{
-   const struct brw_cs_prog_data *a =
-      (const struct brw_cs_prog_data *)in_a;
-   const struct brw_cs_prog_data *b =
-      (const struct brw_cs_prog_data *)in_b;
-
-   /* Compare the base structure. */
-   if (!brw_stage_prog_data_compare(&a->base, &b->base))
-      return false;
-
-   /* Compare the rest of the structure. */
-   const unsigned offset = sizeof(struct brw_stage_prog_data);
-   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
-              sizeof(struct brw_cs_prog_data) - offset))
-      return false;
-
-   return true;
-}
-
-
-static const unsigned *
-brw_cs_emit(struct brw_context *brw,
-            void *mem_ctx,
-            const struct brw_cs_prog_key *key,
-            struct brw_cs_prog_data *prog_data,
-            struct gl_compute_program *cp,
-            struct gl_shader_program *prog,
-            unsigned *final_assembly_size)
-{
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
-   struct brw_shader *shader =
-      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
-   if (unlikely(INTEL_DEBUG & DEBUG_CS))
-      brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
-   prog_data->local_size[0] = cp->LocalSize[0];
-   prog_data->local_size[1] = cp->LocalSize[1];
-   prog_data->local_size[2] = cp->LocalSize[2];
-   unsigned local_workgroup_size =
-      cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
-
-   cfg_t *cfg = NULL;
-   const char *fail_msg = NULL;
-
-   int st_index = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
-   /* Now the main event: Visit the shader IR and generate our CS IR for it.
-    */
-   fs_visitor v8(brw->intelScreen->compiler, brw,
-                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                 &cp->Base, 8, st_index);
-   if (!v8.run_cs()) {
-      fail_msg = v8.fail_msg;
-   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
-      cfg = v8.cfg;
-      prog_data->simd_size = 8;
-   }
-
-   fs_visitor v16(brw->intelScreen->compiler, brw,
-                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                  &cp->Base, 16, st_index);
-   if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
-       !fail_msg && !v8.simd16_unsupported &&
-       local_workgroup_size <= 16 * brw->max_cs_threads) {
-      /* Try a SIMD16 compile */
-      v16.import_uniforms(&v8);
-      if (!v16.run_cs()) {
-         perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
-         if (!cfg) {
-            fail_msg =
-               "Couldn't generate SIMD16 program and not "
-               "enough threads for SIMD8";
-         }
-      } else {
-         cfg = v16.cfg;
-         prog_data->simd_size = 16;
-      }
-   }
-
-   if (unlikely(cfg == NULL)) {
-      assert(fail_msg);
-      prog->LinkStatus = false;
-      ralloc_strcat(&prog->InfoLog, fail_msg);
-      _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
-                    fail_msg);
-      return NULL;
-   }
-
-   fs_generator g(brw->intelScreen->compiler, brw,
-                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
-                  v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
-   if (INTEL_DEBUG & DEBUG_CS) {
-      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
-                                   prog->Label ? prog->Label : "unnamed",
-                                   prog->Name);
-      g.enable_debug(name);
-   }
-
-   g.generate_code(cfg, prog_data->simd_size);
-
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once) {
-         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
-      }
-      shader->compiled_once = true;
-
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
-   return g.get_assembly(final_assembly_size);
-}
-
-static bool
-brw_codegen_cs_prog(struct brw_context *brw,
-                    struct gl_shader_program *prog,
-                    struct brw_compute_program *cp,
-                    struct brw_cs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const GLuint *program;
-   void *mem_ctx = ralloc_context(NULL);
-   GLuint program_size;
-   struct brw_cs_prog_data prog_data;
-
-   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-   assert (cs);
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   /* Allocate the references to the uniforms that will end up in the
-    * prog_data associated with the compiled program, and which will be freed
-    * by the state cache.
-    */
-   int param_count = cs->num_uniform_components +
-                     cs->NumImages * BRW_IMAGE_PARAM_SIZE;
-
-   /* The backend also sometimes adds params for texture size. */
-   param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
-   prog_data.base.param =
-      rzalloc_array(NULL, const gl_constant_value *, param_count);
-   prog_data.base.pull_param =
-      rzalloc_array(NULL, const gl_constant_value *, param_count);
-   prog_data.base.image_param =
-      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
-   prog_data.base.nr_params = param_count;
-   prog_data.base.nr_image_params = cs->NumImages;
-
-   program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
-                         &cp->program, prog, &program_size);
-   if (program == NULL) {
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (prog_data.base.total_scratch) {
-      brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
-                         prog_data.base.total_scratch * brw->max_cs_threads);
-   }
-
-   if (unlikely(INTEL_DEBUG & DEBUG_CS))
-      fprintf(stderr, "\n");
-
-   brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
-                    key, sizeof(*key),
-                    program, program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->cs.base.prog_offset, &brw->cs.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-
-static void
-brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   const struct brw_compute_program *cp =
-      (struct brw_compute_program *) brw->compute_program;
-   const struct gl_program *prog = (struct gl_program *) cp;
-
-   memset(key, 0, sizeof(*key));
-
-   /* _NEW_TEXTURE */
-   brw_populate_sampler_prog_key_data(ctx, prog, brw->cs.base.sampler_count,
-                                      &key->tex);
-
-   /* The unique compute program ID */
-   key->program_string_id = cp->id;
-}
-
-
-extern "C"
-void
-brw_upload_cs_prog(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_cs_prog_key key;
-   struct brw_compute_program *cp = (struct brw_compute_program *)
-      brw->compute_program;
-
-   if (!cp)
-      return;
-
-   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
-      return;
-
-   brw->cs.base.sampler_count =
-      _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed);
-
-   brw_cs_populate_key(brw, &key);
-
-   if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
-                         &key, sizeof(key),
-                         &brw->cs.base.prog_offset, &brw->cs.prog_data)) {
-      bool success =
-         brw_codegen_cs_prog(brw,
-                             ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE],
-                             cp, &key);
-      (void) success;
-      assert(success);
-   }
-   brw->cs.base.prog_data = &brw->cs.prog_data->base;
-}
-
-
-extern "C" bool
-brw_cs_precompile(struct gl_context *ctx,
-                  struct gl_shader_program *shader_prog,
-                  struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_cs_prog_key key;
-
-   struct gl_compute_program *cp = (struct gl_compute_program *) prog;
-   struct brw_compute_program *bcp = brw_compute_program(cp);
-
-   memset(&key, 0, sizeof(key));
-   key.program_string_id = bcp->id;
-
-   brw_setup_tex_for_precompile(brw, &key.tex, prog);
-
-   uint32_t old_prog_offset = brw->cs.base.prog_offset;
-   struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data;
-
-   bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key);
-
-   brw->cs.base.prog_offset = old_prog_offset;
-   brw->cs.prog_data = old_prog_data;
-
-   return success;
-}
-
-
-static unsigned
-get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
-{
-   const unsigned simd_size = cs_prog_data->simd_size;
-   unsigned group_size = cs_prog_data->local_size[0] *
-      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
-
-   return (group_size + simd_size - 1) / simd_size;
-}
-
-
-static void
-brw_upload_cs_state(struct brw_context *brw)
-{
-   if (!brw->cs.prog_data)
-      return;
-
-   uint32_t offset;
-   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-                                                8 * 4, 64, &offset);
-   struct gl_program *prog = (struct gl_program *) brw->compute_program;
-   struct brw_stage_state *stage_state = &brw->cs.base;
-   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
-   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
-
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-      brw->vtbl.emit_buffer_surface_state(
-         brw, &stage_state->surf_offset[
-                 prog_data->binding_table.shader_time_start],
-         brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
-         brw->shader_time.bo->size, 1, true);
-   }
-
-   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-                                            prog_data->binding_table.size_bytes,
-                                            32, &stage_state->bind_bo_offset);
-
-   unsigned local_id_dwords = 0;
-
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
-   }
-
-   unsigned push_constant_data_size =
-      (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
-   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
-   unsigned push_constant_regs = reg_aligned_constant_size / 32;
-   unsigned threads = get_cs_thread_count(cs_prog_data);
-
-   uint32_t dwords = brw->gen < 8 ? 8 : 9;
-   BEGIN_BATCH(dwords);
-   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
-
-   if (prog_data->total_scratch) {
-      if (brw->gen >= 8)
-         OUT_RELOC64(stage_state->scratch_bo,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                     ffs(prog_data->total_scratch) - 11);
-      else
-         OUT_RELOC(stage_state->scratch_bo,
-                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                   ffs(prog_data->total_scratch) - 11);
-   } else {
-      OUT_BATCH(0);
-      if (brw->gen >= 8)
-         OUT_BATCH(0);
-   }
-
-   const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
-   const uint32_t vfe_gpgpu_mode =
-      brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
-   OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) |
-             SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
-             SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
-             SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
-             vfe_gpgpu_mode);
-
-   OUT_BATCH(0);
-   const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
-
-   /* We are uploading duplicated copies of push constant uniforms for each
-    * thread. Although the local id data needs to vary per thread, it won't
-    * change for other uniform data. Unfortunately this duplication is
-    * required for gen7. As of Haswell, this duplication can be avoided, but
-    * this older mechanism with duplicated data continues to work.
-    *
-    * FINISHME: As of Haswell, we could make use of the
-    * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
-    * to only store one copy of uniform data.
-    *
-    * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
-    * which is described in the GPGPU_WALKER command and in the Broadwell PRM
-    * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
-    * Operations => GPGPU Mode => Indirect Payload Storage.
-    *
-    * Note: The constant data is built in brw_upload_cs_push_constants below.
-    */
-   const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
-   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
-             SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   if (reg_aligned_constant_size > 0) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(reg_aligned_constant_size * threads);
-      OUT_BATCH(stage_state->push_const_offset);
-      ADVANCE_BATCH();
-   }
-
-   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
-   memcpy(bind, stage_state->surf_offset,
-          prog_data->binding_table.size_bytes);
-
-   memset(desc, 0, 8 * 4);
-
-   int dw = 0;
-   desc[dw++] = brw->cs.base.prog_offset;
-   if (brw->gen >= 8)
-      desc[dw++] = 0; /* Kernel Start Pointer High */
-   desc[dw++] = 0;
-   desc[dw++] = stage_state->sampler_offset |
-      ((stage_state->sampler_count + 3) / 4);
-   desc[dw++] = stage_state->bind_bo_offset;
-   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
-   const uint32_t media_threads =
-      brw->gen >= 8 ?
-      SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
-      SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
-   assert(threads <= brw->max_cs_threads);
-   desc[dw++] =
-      SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
-      media_threads;
-
-   BEGIN_BATCH(4);
-   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(8 * 4);
-   OUT_BATCH(offset);
-   ADVANCE_BATCH();
-}
-
-
-extern "C"
-const struct brw_tracked_state brw_cs_state = {
-   /* explicit initialisers aren't valid C++, comment
-    * them for documentation purposes */
-   /* .dirty = */{
-      /* .mesa = */ _NEW_PROGRAM_CONSTANTS,
-      /* .brw = */  BRW_NEW_CS_PROG_DATA |
-                    BRW_NEW_PUSH_CONSTANT_ALLOCATION,
-   },
-   /* .emit = */ brw_upload_cs_state
-};
-
-
-/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- *    no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- *    easily use 16-bit words rather than 32-bit dwords in the push constant
- *    data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- *    conveying the data, and thereby reduce push constant usage.
- *
- */
-unsigned
-brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
-                                    unsigned dispatch_width)
-{
-   return 3 * dispatch_width;
-}
-
-
-static void
-fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
-                      void *buffer, unsigned *x, unsigned *y, unsigned *z)
-{
-   uint32_t *param = (uint32_t *)buffer;
-   for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
-      param[0 * cs_prog_data->simd_size + i] = *x;
-      param[1 * cs_prog_data->simd_size + i] = *y;
-      param[2 * cs_prog_data->simd_size + i] = *z;
-
-      (*x)++;
-      if (*x == cs_prog_data->local_size[0]) {
-         *x = 0;
-         (*y)++;
-         if (*y == cs_prog_data->local_size[1]) {
-            *y = 0;
-            (*z)++;
-            if (*z == cs_prog_data->local_size[2])
-               *z = 0;
-         }
-      }
-   }
-}
-
-
-fs_reg *
-fs_visitor::emit_cs_local_invocation_id_setup()
-{
-   assert(stage == MESA_SHADER_COMPUTE);
-
-   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
-
-   struct brw_reg src =
-      brw_vec8_grf(payload.local_invocation_id_reg, 0);
-   src = retype(src, BRW_REGISTER_TYPE_UD);
-   bld.MOV(*reg, src);
-   src.nr += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 1), src);
-   src.nr += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 2), src);
-
-   return reg;
-}
-
-
-/**
- * Creates a region containing the push constants for the CS on gen7+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
- * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
- * gen6+.
- */
-static void
-brw_upload_cs_push_constants(struct brw_context *brw,
-                             const struct gl_program *prog,
-                             const struct brw_cs_prog_data *cs_prog_data,
-                             struct brw_stage_state *stage_state,
-                             enum aub_state_struct_type type)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct brw_stage_prog_data *prog_data =
-      (brw_stage_prog_data*) cs_prog_data;
-   unsigned local_id_dwords = 0;
-
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
-   }
-
-   /* Updates the ParamaterValues[i] pointers for all parameters of the
-    * basic type of PROGRAM_STATE_VAR.
-    */
-   /* XXX: Should this happen somewhere before to get our state flag set? */
-   _mesa_load_state_parameters(ctx, prog->Parameters);
-
-   if (prog_data->nr_params == 0 && local_id_dwords == 0) {
-      stage_state->push_const_size = 0;
-   } else {
-      gl_constant_value *param;
-      unsigned i, t;
-
-      const unsigned push_constant_data_size =
-         (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
-      const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
-      const unsigned param_aligned_count =
-         reg_aligned_constant_size / sizeof(*param);
-
-      unsigned threads = get_cs_thread_count(cs_prog_data);
-
-      param = (gl_constant_value*)
-         brw_state_batch(brw, type,
-                         reg_aligned_constant_size * threads,
-                         32, &stage_state->push_const_offset);
-      assert(param);
-
-      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
-      /* _NEW_PROGRAM_CONSTANTS */
-      unsigned x = 0, y = 0, z = 0;
-      for (t = 0; t < threads; t++) {
-         gl_constant_value *next_param = &param[t * param_aligned_count];
-         if (local_id_dwords > 0) {
-            fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
-            next_param += local_id_dwords;
-         }
-         for (i = 0; i < prog_data->nr_params; i++) {
-            next_param[i] = *prog_data->param[i];
-         }
-      }
-
-      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
-   }
-}
-
-
-static void
-gen7_upload_cs_push_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->cs.base;
-
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   const struct brw_compute_program *cp =
-      (struct brw_compute_program *) brw->compute_program;
-
-   if (cp) {
-      /* CACHE_NEW_CS_PROG */
-      struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
-
-      brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
-                                   stage_state, AUB_TRACE_WM_CONSTANTS);
-   }
-}
-
-
-const struct brw_tracked_state gen7_cs_push_constants = {
-   /* .dirty = */{
-      /* .mesa  = */ _NEW_PROGRAM_CONSTANTS,
-      /* .brw   = */ BRW_NEW_COMPUTE_PROGRAM |
-                     BRW_NEW_PUSH_CONSTANT_ALLOCATION,
-   },
-   /* .emit = */ gen7_upload_cs_push_constants,
-};
-
-
-fs_reg *
-fs_visitor::emit_cs_work_group_id_setup()
-{
-   assert(stage == MESA_SHADER_COMPUTE);
-
-   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
-
-   struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
-   struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
-   struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
-
-   bld.MOV(*reg, r0_1);
-   bld.MOV(offset(*reg, bld, 1), r0_6);
-   bld.MOV(offset(*reg, bld, 2), r0_7);
-
-   return reg;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 08310df77c1..746fb05166c 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -41,12 +41,20 @@ bool brw_cs_prog_data_compare(const void *a, const void *b);
 void
 brw_upload_cs_prog(struct brw_context *brw);
 
-#ifdef __cplusplus
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+            void *mem_ctx,
+            const struct brw_cs_prog_key *key,
+            struct brw_cs_prog_data *prog_data,
+            struct gl_compute_program *cp,
+            struct gl_shader_program *prog,
+            unsigned *final_assembly_size);
 
 unsigned
 brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
                                     unsigned dispatch_width);
 
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 10417c87484..1fc9175724d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5297,3 +5297,146 @@ brw_fs_precompile(struct gl_context *ctx,
 
    return success;
 }
+
+fs_reg *
+fs_visitor::emit_cs_local_invocation_id_setup()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
+
+   struct brw_reg src =
+      brw_vec8_grf(payload.local_invocation_id_reg, 0);
+   src = retype(src, BRW_REGISTER_TYPE_UD);
+   bld.MOV(*reg, src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 1), src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 2), src);
+
+   return reg;
+}
+
+fs_reg *
+fs_visitor::emit_cs_work_group_id_setup()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
+
+   struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
+
+   bld.MOV(*reg, r0_1);
+   bld.MOV(offset(*reg, bld, 1), r0_6);
+   bld.MOV(offset(*reg, bld, 2), r0_7);
+
+   return reg;
+}
+
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+            void *mem_ctx,
+            const struct brw_cs_prog_key *key,
+            struct brw_cs_prog_data *prog_data,
+            struct gl_compute_program *cp,
+            struct gl_shader_program *prog,
+            unsigned *final_assembly_size)
+{
+   bool start_busy = false;
+   double start_time = 0;
+
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
+   struct brw_shader *shader =
+      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+      brw_dump_ir("compute", prog, &shader->base, &cp->Base);
+
+   prog_data->local_size[0] = cp->LocalSize[0];
+   prog_data->local_size[1] = cp->LocalSize[1];
+   prog_data->local_size[2] = cp->LocalSize[2];
+   unsigned local_workgroup_size =
+      cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+
+   cfg_t *cfg = NULL;
+   const char *fail_msg = NULL;
+
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
+
+   /* Now the main event: Visit the shader IR and generate our CS IR for it.
+    */
+   fs_visitor v8(brw->intelScreen->compiler, brw,
+                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                 &cp->Base, 8, st_index);
+   if (!v8.run_cs()) {
+      fail_msg = v8.fail_msg;
+   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+      cfg = v8.cfg;
+      prog_data->simd_size = 8;
+   }
+
+   fs_visitor v16(brw->intelScreen->compiler, brw,
+                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                  &cp->Base, 16, st_index);
+   if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+       !fail_msg && !v8.simd16_unsupported &&
+       local_workgroup_size <= 16 * brw->max_cs_threads) {
+      /* Try a SIMD16 compile */
+      v16.import_uniforms(&v8);
+      if (!v16.run_cs()) {
+         perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+         if (!cfg) {
+            fail_msg =
+               "Couldn't generate SIMD16 program and not "
+               "enough threads for SIMD8";
+         }
+      } else {
+         cfg = v16.cfg;
+         prog_data->simd_size = 16;
+      }
+   }
+
+   if (unlikely(cfg == NULL)) {
+      assert(fail_msg);
+      prog->LinkStatus = false;
+      ralloc_strcat(&prog->InfoLog, fail_msg);
+      _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+                    fail_msg);
+      return NULL;
+   }
+
+   fs_generator g(brw->intelScreen->compiler, brw,
+                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+                  v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+   if (INTEL_DEBUG & DEBUG_CS) {
+      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+                                   prog->Label ? prog->Label : "unnamed",
+                                   prog->Name);
+      g.enable_debug(name);
+   }
+
+   g.generate_code(cfg, prog_data->simd_size);
+
+   if (unlikely(brw->perf_debug) && shader) {
+      if (shader->compiled_once) {
+         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+      }
+      shader->compiled_once = true;
+
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+
+   return g.get_assembly(final_assembly_size);
+}
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
new file mode 100644
index 00000000000..d9561cfce61
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "brw_state.h"
+
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+   const unsigned simd_size = cs_prog_data->simd_size;
+   unsigned group_size = cs_prog_data->local_size[0] *
+      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+   return (group_size + simd_size - 1) / simd_size;
+}
+
+
+static void
+brw_upload_cs_state(struct brw_context *brw)
+{
+   if (!brw->cs.prog_data)
+      return;
+
+   uint32_t offset;
+   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+                                                8 * 4, 64, &offset);
+   struct gl_program *prog = (struct gl_program *) brw->compute_program;
+   struct brw_stage_state *stage_state = &brw->cs.base;
+   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      brw->vtbl.emit_buffer_surface_state(
+         brw, &stage_state->surf_offset[
+                 prog_data->binding_table.shader_time_start],
+         brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
+         brw->shader_time.bo->size, 1, true);
+   }
+
+   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                                            prog_data->binding_table.size_bytes,
+                                            32, &stage_state->bind_bo_offset);
+
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
+
+   unsigned push_constant_data_size =
+      (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
+   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+   unsigned push_constant_regs = reg_aligned_constant_size / 32;
+   unsigned threads = get_cs_thread_count(cs_prog_data);
+
+   uint32_t dwords = brw->gen < 8 ? 8 : 9;
+   BEGIN_BATCH(dwords);
+   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
+
+   if (prog_data->total_scratch) {
+      if (brw->gen >= 8)
+         OUT_RELOC64(stage_state->scratch_bo,
+                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                     ffs(prog_data->total_scratch) - 11);
+      else
+         OUT_RELOC(stage_state->scratch_bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   ffs(prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+      if (brw->gen >= 8)
+         OUT_BATCH(0);
+   }
+
+   const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
+   const uint32_t vfe_gpgpu_mode =
+      brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
+   OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) |
+             SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
+             SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
+             SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
+             vfe_gpgpu_mode);
+
+   OUT_BATCH(0);
+   const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
+
+   /* We are uploading duplicated copies of push constant uniforms for each
+    * thread. Although the local id data needs to vary per thread, it won't
+    * change for other uniform data. Unfortunately this duplication is
+    * required for gen7. As of Haswell, this duplication can be avoided, but
+    * this older mechanism with duplicated data continues to work.
+    *
+    * FINISHME: As of Haswell, we could make use of the
+    * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
+    * to only store one copy of uniform data.
+    *
+    * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
+    * which is described in the GPGPU_WALKER command and in the Broadwell PRM
+    * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
+    * Operations => GPGPU Mode => Indirect Payload Storage.
+    *
+    * Note: The constant data is built in brw_upload_cs_push_constants below.
+    */
+   const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
+   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
+             SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   if (reg_aligned_constant_size > 0) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(reg_aligned_constant_size * threads);
+      OUT_BATCH(stage_state->push_const_offset);
+      ADVANCE_BATCH();
+   }
+
+   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+   memcpy(bind, stage_state->surf_offset,
+          prog_data->binding_table.size_bytes);
+
+   memset(desc, 0, 8 * 4);
+
+   int dw = 0;
+   desc[dw++] = brw->cs.base.prog_offset;
+   if (brw->gen >= 8)
+      desc[dw++] = 0; /* Kernel Start Pointer High */
+   desc[dw++] = 0;
+   desc[dw++] = stage_state->sampler_offset |
+      ((stage_state->sampler_count + 3) / 4);
+   desc[dw++] = stage_state->bind_bo_offset;
+   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
+   const uint32_t media_threads =
+      brw->gen >= 8 ?
+      SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+      SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+   assert(threads <= brw->max_cs_threads);
+   desc[dw++] =
+      SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+      media_threads;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(8 * 4);
+   OUT_BATCH(offset);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_cs_state = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_CS_PROG_DATA |
+             BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   },
+   .emit = brw_upload_cs_state
+};
+
+
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width)
+{
+   return 3 * dispatch_width;
+}
+
+
+static void
+fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+                      void *buffer, unsigned *x, unsigned *y, unsigned *z)
+{
+   uint32_t *param = (uint32_t *)buffer;
+   for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
+      param[0 * cs_prog_data->simd_size + i] = *x;
+      param[1 * cs_prog_data->simd_size + i] = *y;
+      param[2 * cs_prog_data->simd_size + i] = *z;
+
+      (*x)++;
+      if (*x == cs_prog_data->local_size[0]) {
+         *x = 0;
+         (*y)++;
+         if (*y == cs_prog_data->local_size[1]) {
+            *y = 0;
+            (*z)++;
+            if (*z == cs_prog_data->local_size[2])
+               *z = 0;
+         }
+      }
+   }
+}
+
+
+/**
+ * Creates a region containing the push constants for the CS on gen7+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
+ * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
+ * gen6+.
+ */
+static void
+brw_upload_cs_push_constants(struct brw_context *brw,
+                             const struct gl_program *prog,
+                             const struct brw_cs_prog_data *cs_prog_data,
+                             struct brw_stage_state *stage_state,
+                             enum aub_state_struct_type type)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_stage_prog_data *prog_data =
+      (struct brw_stage_prog_data*) cs_prog_data;
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
+
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, prog->Parameters);
+
+   if (prog_data->nr_params == 0 && local_id_dwords == 0) {
+      stage_state->push_const_size = 0;
+   } else {
+      gl_constant_value *param;
+      unsigned i, t;
+
+      const unsigned push_constant_data_size =
+         (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
+      const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+      const unsigned param_aligned_count =
+         reg_aligned_constant_size / sizeof(*param);
+
+      unsigned threads = get_cs_thread_count(cs_prog_data);
+
+      param = (gl_constant_value*)
+         brw_state_batch(brw, type,
+                         reg_aligned_constant_size * threads,
+                         32, &stage_state->push_const_offset);
+      assert(param);
+
+      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+      /* _NEW_PROGRAM_CONSTANTS */
+      unsigned x = 0, y = 0, z = 0;
+      for (t = 0; t < threads; t++) {
+         gl_constant_value *next_param = &param[t * param_aligned_count];
+         if (local_id_dwords > 0) {
+            fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
+            next_param += local_id_dwords;
+         }
+         for (i = 0; i < prog_data->nr_params; i++) {
+            next_param[i] = *prog_data->param[i];
+         }
+      }
+
+      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+   }
+}
+
+
+static void
+gen7_upload_cs_push_constants(struct brw_context *brw)
+{
+   struct brw_stage_state *stage_state = &brw->cs.base;
+
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct brw_compute_program *cp =
+      (struct brw_compute_program *) brw->compute_program;
+
+   if (cp) {
+      /* CACHE_NEW_CS_PROG */
+      struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+
+      brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
+                                   stage_state, AUB_TRACE_WM_CONSTANTS);
+   }
+}
+
+const struct brw_tracked_state gen7_cs_push_constants = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_COMPUTE_PROGRAM |
+             BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   },
+   .emit = gen7_upload_cs_push_constants,
+};
-- 
cgit v1.2.3


From 84f2ed2cfdab45aa949aa6affe46cfe2944759c1 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Fri, 4 Sep 2015 16:55:03 -0700
Subject: i965: Move brw_fs_precompile() to brw_wm.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All other precompile functions live in the brw_<stage>.c files, make fs
follow the convention.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 58 -----------------------------------
 src/mesa/drivers/dri/i965/brw_wm.c   | 59 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 58 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1fc9175724d..d4900f14774 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5240,64 +5240,6 @@ brw_wm_fs_emit(struct brw_context *brw,
    return g.get_assembly(final_assembly_size);
 }
 
-extern "C" bool
-brw_fs_precompile(struct gl_context *ctx,
-                  struct gl_shader_program *shader_prog,
-                  struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_wm_prog_key key;
-
-   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
-   struct brw_fragment_program *bfp = brw_fragment_program(fp);
-   bool program_uses_dfdy = fp->UsesDFdy;
-
-   memset(&key, 0, sizeof(key));
-
-   if (brw->gen < 6) {
-      if (fp->UsesKill)
-         key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
-
-      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-         key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
-
-      /* Just assume depth testing. */
-      key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-      key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
-   }
-
-   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
-                                         BRW_FS_VARYING_INPUT_MASK) > 16)
-      key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
-
-   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
-
-   if (fp->Base.InputsRead & VARYING_BIT_POS) {
-      key.drawable_height = ctx->DrawBuffer->Height;
-   }
-
-   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
-         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
-         BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
-
-   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
-      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
-                          key.nr_color_regions > 1;
-   }
-
-   key.program_string_id = bfp->id;
-
-   uint32_t old_prog_offset = brw->wm.base.prog_offset;
-   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
-
-   bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
-
-   brw->wm.base.prog_offset = old_prog_offset;
-   brw->wm.prog_data = old_prog_data;
-
-   return success;
-}
-
 fs_reg *
 fs_visitor::emit_cs_local_invocation_id_setup()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 6ee92848172..8dfa1427e4f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -26,6 +26,7 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_state.h"
+#include "brw_shader.h"
 #include "main/enums.h"
 #include "main/formats.h"
 #include "main/fbobject.h"
@@ -642,3 +643,61 @@ brw_upload_wm_prog(struct brw_context *brw)
    }
    brw->wm.base.prog_data = &brw->wm.prog_data->base;
 }
+
+bool
+brw_fs_precompile(struct gl_context *ctx,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_wm_prog_key key;
+
+   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+   struct brw_fragment_program *bfp = brw_fragment_program(fp);
+   bool program_uses_dfdy = fp->UsesDFdy;
+
+   memset(&key, 0, sizeof(key));
+
+   if (brw->gen < 6) {
+      if (fp->UsesKill)
+         key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+      /* Just assume depth testing. */
+      key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+      key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   }
+
+   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+                                         BRW_FS_VARYING_INPUT_MASK) > 16)
+      key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+
+   if (fp->Base.InputsRead & VARYING_BIT_POS) {
+      key.drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+         BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+                          key.nr_color_regions > 1;
+   }
+
+   key.program_string_id = bfp->id;
+
+   uint32_t old_prog_offset = brw->wm.base.prog_offset;
+   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+   bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+
+   brw->wm.base.prog_offset = old_prog_offset;
+   brw->wm.prog_data = old_prog_data;
+
+   return success;
+}
-- 
cgit v1.2.3


From a548c75e31b4146d55133cb8c57a82117c196584 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Fri, 4 Sep 2015 17:09:40 -0700
Subject: i965: Move perf_debug code to brw_codegen_*_prog()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're trying to avoid a libdrm dependency in the core compiler, so let's
move the perf_debug code one level up from the brw_*_emit() helpers to
the brw_codegen_*_prog() helpers.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_cs.c     | 31 ++++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 41 ----------------------------------
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 ----------------
 src/mesa/drivers/dri/i965/brw_vs.c     | 29 +++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_wm.c     | 31 ++++++++++++++++++++-----
 5 files changed, 75 insertions(+), 76 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 012c46698e7..cb3fae66ec3 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -63,8 +63,11 @@ brw_codegen_cs_prog(struct brw_context *brw,
    void *mem_ctx = ralloc_context(NULL);
    GLuint program_size;
    struct brw_cs_prog_data prog_data;
+   bool start_busy = false;
+   double start_time = 0;
 
-   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+   struct brw_shader *cs =
+      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
    assert (cs);
 
    memset(&prog_data, 0, sizeof(prog_data));
@@ -73,8 +76,8 @@ brw_codegen_cs_prog(struct brw_context *brw,
     * prog_data associated with the compiled program, and which will be freed
     * by the state cache.
     */
-   int param_count = cs->num_uniform_components +
-                     cs->NumImages * BRW_IMAGE_PARAM_SIZE;
+   int param_count = cs->base.num_uniform_components +
+                     cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
 
    /* The backend also sometimes adds params for texture size. */
    param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -83,9 +86,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
    prog_data.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    prog_data.base.image_param =
-      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
+      rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages);
    prog_data.base.nr_params = param_count;
-   prog_data.base.nr_image_params = cs->NumImages;
+   prog_data.base.nr_image_params = cs->base.NumImages;
+
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
 
    program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
                          &cp->program, prog, &program_size);
@@ -94,6 +103,18 @@ brw_codegen_cs_prog(struct brw_context *brw,
       return false;
    }
 
+   if (unlikely(brw->perf_debug) && cs) {
+      if (cs->compiled_once) {
+         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+      }
+      cs->compiled_once = true;
+
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+
    if (prog_data.base.total_scratch) {
       brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
                          prog_data.base.total_scratch * brw->max_cs_threads);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d4900f14774..b9f1051eca4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5140,15 +5140,6 @@ brw_wm_fs_emit(struct brw_context *brw,
                struct gl_shader_program *prog,
                unsigned *final_assembly_size)
 {
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
    struct brw_shader *shader = NULL;
    if (prog)
       shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
@@ -5226,17 +5217,6 @@ brw_wm_fs_emit(struct brw_context *brw,
    if (simd16_cfg)
       prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
 
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once)
-         brw_wm_debug_recompile(brw, prog, key);
-      shader->compiled_once = true;
-
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
    return g.get_assembly(final_assembly_size);
 }
 
@@ -5286,15 +5266,6 @@ brw_cs_emit(struct brw_context *brw,
             struct gl_shader_program *prog,
             unsigned *final_assembly_size)
 {
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
    struct brw_shader *shader =
       (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
 
@@ -5368,17 +5339,5 @@ brw_cs_emit(struct brw_context *brw,
 
    g.generate_code(cfg, prog_data->simd_size);
 
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once) {
-         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
-      }
-      shader->compiled_once = true;
-
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
    return g.get_assembly(final_assembly_size);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 181768bddea..ed49cd38987 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1930,16 +1930,8 @@ brw_vs_emit(struct brw_context *brw,
             struct gl_shader_program *prog,
             unsigned *final_assembly_size)
 {
-   bool start_busy = false;
-   double start_time = 0;
    const unsigned *assembly = NULL;
 
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
    struct brw_shader *shader = NULL;
    if (prog)
       shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
@@ -2029,17 +2021,6 @@ brw_vs_emit(struct brw_context *brw,
       assembly = g.generate_assembly(v.cfg, final_assembly_size);
    }
 
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once) {
-         brw_vs_debug_recompile(brw, prog, key);
-      }
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-      shader->compiled_once = true;
-   }
-
    return assembly;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 4e0d34f6c6f..465e78f4c74 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -98,10 +98,12 @@ brw_codegen_vs_prog(struct brw_context *brw,
    struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
    void *mem_ctx;
    int i;
-   struct gl_shader *vs = NULL;
+   struct brw_shader *vs = NULL;
+   bool start_busy = false;
+   double start_time = 0;
 
    if (prog)
-      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+      vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
 
    memset(&prog_data, 0, sizeof(prog_data));
 
@@ -121,9 +123,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
        * case being a float value that gets blown up to a vec4, so be
        * conservative here.
        */
-      param_count = vs->num_uniform_components * 4 +
-                    vs->NumImages * BRW_IMAGE_PARAM_SIZE;
-      stage_prog_data->nr_image_params = vs->NumImages;
+      param_count = vs->base.num_uniform_components * 4 +
+                    vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+      stage_prog_data->nr_image_params = vs->base.NumImages;
    } else {
       param_count = vp->program.Base.Parameters->NumParameters * 4;
    }
@@ -185,6 +187,12 @@ brw_codegen_vs_prog(struct brw_context *brw,
 			       true);
    }
 
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
    /* Emit GEN4 code.
     */
    program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
@@ -194,6 +202,17 @@ brw_codegen_vs_prog(struct brw_context *brw,
       return false;
    }
 
+   if (unlikely(brw->perf_debug) && vs) {
+      if (vs->compiled_once) {
+         brw_vs_debug_recompile(brw, prog, key);
+      }
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+      vs->compiled_once = true;
+   }
+
    /* Scratch space is used for register spilling */
    if (prog_data.base.base.total_scratch) {
       brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 8dfa1427e4f..35c0908324f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -165,11 +165,13 @@ brw_codegen_wm_prog(struct brw_context *brw,
    void *mem_ctx = ralloc_context(NULL);
    struct brw_wm_prog_data prog_data;
    const GLuint *program;
-   struct gl_shader *fs = NULL;
+   struct brw_shader *fs = NULL;
    GLuint program_size;
+   bool start_busy = false;
+   double start_time = 0;
 
    if (prog)
-      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+      fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
 
    memset(&prog_data, 0, sizeof(prog_data));
    /* key->alpha_test_func means simulating alpha testing via discards,
@@ -180,7 +182,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
       fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
    prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
 
-   prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+   prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
 
    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
    if (!prog)
@@ -192,9 +194,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
     */
    int param_count;
    if (fs) {
-      param_count = fs->num_uniform_components +
-                    fs->NumImages * BRW_IMAGE_PARAM_SIZE;
-      prog_data.base.nr_image_params = fs->NumImages;
+      param_count = fs->base.num_uniform_components +
+                    fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+      prog_data.base.nr_image_params = fs->base.NumImages;
    } else {
       param_count = fp->program.Base.Parameters->NumParameters * 4;
    }
@@ -214,6 +216,12 @@ brw_codegen_wm_prog(struct brw_context *brw,
                                            key->persample_shading,
                                            &fp->program);
 
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
    program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
                             &fp->program, prog, &program_size);
    if (program == NULL) {
@@ -221,6 +229,17 @@ brw_codegen_wm_prog(struct brw_context *brw,
       return false;
    }
 
+   if (unlikely(brw->perf_debug) && fs) {
+      if (fs->compiled_once)
+         brw_wm_debug_recompile(brw, prog, key);
+      fs->compiled_once = true;
+
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+
    if (prog_data.base.total_scratch) {
       brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
 			 prog_data.base.total_scratch * brw->max_wm_threads);
-- 
cgit v1.2.3


From 4bf151e66279da00655cec02aadb52c9c6583213 Mon Sep 17 00:00:00 2001
From: Ray Strode <rstrode@redhat.com>
Date: Fri, 28 Aug 2015 14:50:21 -0400
Subject: gbm: convert gbm bo format to fourcc format on dma-buf import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At the moment if a gbm buffer is imported and the gbm buffer
has an old-style GBM_BO_FORMAT format, the import will crash,
since it's passed directly to DRI functions that expect
a fourcc format (as provided by the newer GBM_FORMAT
definitions)

This commit addresses the problem in two ways:

1) it prevents invalid formats from leading to a crash by
returning EINVAL if the image couldn't be created

2) it translates GBM_BO_FORMAT formats into the comparable
GBM_FORMAT formats.

Reference: https://bugzilla.gnome.org/show_bug.cgi?id=753531
CC: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/gbm/backends/dri/gbm_dri.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index ccc3cc6930f..57cdeacdccd 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
    {
       struct gbm_import_fd_data *fd_data = buffer;
       int stride = fd_data->stride, offset = 0;
+      int dri_format;
+
+      switch (fd_data->format) {
+      case GBM_BO_FORMAT_XRGB8888:
+         dri_format = GBM_FORMAT_XRGB8888;
+         break;
+      case GBM_BO_FORMAT_ARGB8888:
+         dri_format = GBM_FORMAT_ARGB8888;
+         break;
+      default:
+         dri_format = fd_data->format;
+      }
 
       image = dri->image->createImageFromFds(dri->screen,
                                              fd_data->width,
                                              fd_data->height,
-                                             fd_data->format,
+                                             dri_format,
                                              &fd_data->fd, 1,
                                              &stride, &offset,
                                              NULL);
+      if (image == NULL) {
+         errno = EINVAL;
+         return NULL;
+      }
       gbm_format = fd_data->format;
       break;
    }
-- 
cgit v1.2.3


From e4f0d26c8c3c092a09fb65184ab080de4e38373e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 15 Jul 2015 09:00:41 -0700
Subject: egl/dri2: Close file descriptor on error.

v2: [Emil Velikov]
Rework the error path to a common goto, close only if we own the fd.
v3; [Emil Velikov]
Always close the fd (we either opened the device or dup'd) (Boyan, Ian)

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/platform_drm.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index eda50875e02..7e972804608 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
       dri2_dpy->own_device = 1;
       gbm = gbm_create_device(fd);
       if (gbm == NULL)
-         return EGL_FALSE;
+         goto cleanup;
    }
 
-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+      goto cleanup;
 
    dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+      goto cleanup;
 
    if (fd < 0) {
       fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0) {
-         free(dri2_dpy);
-         return EGL_FALSE;
-      }
+      if (fd < 0)
+         goto cleanup;
    }
 
    dri2_dpy->fd = fd;
@@ -727,4 +721,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
    dri2_dpy->vtbl = &dri2_drm_display_vtbl;
 
    return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+      close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
 }
-- 
cgit v1.2.3


From bd5bcb5b8ca043a3908d7a70ab1193efc1db278d Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 7 Sep 2015 09:53:53 +0100
Subject: egl/dri2/drm: compact existing device mgmt

Move the fcntl(dupfd_cloexec) to the else branch where it belongs.
Otherwise it's not immediately obvious that the code is hit, only when
an existing device is used.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/platform_drm.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 7e972804608..050c309dceb 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -624,6 +624,10 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
       gbm = gbm_create_device(fd);
       if (gbm == NULL)
          goto cleanup;
+   } else {
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+      if (fd < 0)
+         goto cleanup;
    }
 
    if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
@@ -633,12 +637,6 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
    if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
       goto cleanup;
 
-   if (fd < 0) {
-      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0)
-         goto cleanup;
-   }
-
    dri2_dpy->fd = fd;
    dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
    dri2_dpy->driver_name = strdup(dri2_dpy->gbm_dri->base.driver_name);
-- 
cgit v1.2.3


From a1ac742f709089eabad59b4da484799091203d91 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 10 Sep 2015 14:41:38 +0100
Subject: egl/dri2: don't leak the fd on dri2_terminate

Currently the check was incorrect as it did not consider the (unlikely)
case of fd == 0. In order to fix this we should first correctly
initialize it to -1, as the swrast implementations leave it set to zero
(props to calloc()).

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Boyan Ding <boyan.j.ding@gmail.com>
---
 src/egl/drivers/dri2/egl_dri2.c         | 2 +-
 src/egl/drivers/dri2/platform_wayland.c | 1 +
 src/egl/drivers/dri2/platform_x11.c     | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index eb56113a4ea..1740ee3dc47 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -786,7 +786,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
 
    if (dri2_dpy->own_dri_screen)
       dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
       close(dri2_dpy->fd);
    if (dri2_dpy->driver)
       dlclose(dri2_dpy->driver);
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index dbc64ba2d8a..6cf5461d52c 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1804,6 +1804,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
       goto cleanup_shm;
 
+   dri2_dpy->fd = -1;
    dri2_dpy->driver_name = strdup("swrast");
    if (!dri2_load_driver_swrast(disp))
       goto cleanup_shm;
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index bf7d2bea4c1..7991fc2b67b 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1161,6 +1161,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
     * Every hardware driver_name is set using strdup. Doing the same in
     * here will allow is to simply free the memory at dri2_terminate().
     */
+   dri2_dpy->fd = -1;
    dri2_dpy->driver_name = strdup("swrast");
    if (!dri2_load_driver_swrast(disp))
       goto cleanup_conn;
-- 
cgit v1.2.3


From edfb7ed1099cc8b6bebc2f1cb0b7b71bdcdbedeb Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 14 Sep 2015 23:40:07 -0700
Subject: gallium/svga: Enable PIPE_FORMAT_L8_UNORM for vgpu10

It's extensively used by XA for a8- and planar yuv component surfaces.
This fixes broken XA yuv blits using vgpu10 contexts.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/svga/svga_format.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 67f6e51fce6..28b8064bf70 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -56,7 +56,7 @@ static const struct vgpu10_format_entry format_conversion_table[] =
    { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_A4R4G4B4,             0 },
    { PIPE_FORMAT_B5G6R5_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_B5G6R5_UNORM,         0 },
    { PIPE_FORMAT_R10G10B10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_R10G10B10A2_UNORM,    0 },
-   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_LUMINANCE8,           0 },
    { PIPE_FORMAT_A8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_A8_UNORM,             0 },
    { PIPE_FORMAT_I8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_L8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
-- 
cgit v1.2.3


From fe796a18319d791f3ed3865761452474b1c0477f Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 27 Aug 2015 16:05:22 -0700
Subject: mesa/extensions: restrict luminance alpha formats to
 API_OPENGL_COMPAT

According the GL 3.1 spec, luminance alpha formats are deprecated.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/extensions.c | 4 ++--
 src/mesa/main/teximage.c   | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 1f7d5420fff..b2c88c37366 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -263,7 +263,7 @@ static const struct extension extension_table[] = {
    { "GL_EXT_texture_compression_dxt1",            o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2004 },
    { "GL_ANGLE_texture_compression_dxt3",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
    { "GL_ANGLE_texture_compression_dxt5",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
-   { "GL_EXT_texture_compression_latc",            o(EXT_texture_compression_latc),            GL,             2006 },
+   { "GL_EXT_texture_compression_latc",            o(EXT_texture_compression_latc),            GLL,            2006 },
    { "GL_EXT_texture_compression_rgtc",            o(ARB_texture_compression_rgtc),            GL,             2004 },
    { "GL_EXT_texture_compression_s3tc",            o(EXT_texture_compression_s3tc),            GL,             2000 },
    { "GL_EXT_texture_cube_map",                    o(ARB_texture_cube_map),                    GLL,            2001 },
@@ -366,7 +366,7 @@ static const struct extension extension_table[] = {
    { "GL_ATI_draw_buffers",                        o(dummy_true),                              GLL,            2002 },
    { "GL_ATI_fragment_shader",                     o(ATI_fragment_shader),                     GLL,            2001 },
    { "GL_ATI_separate_stencil",                    o(ATI_separate_stencil),                    GLL,            2006 },
-   { "GL_ATI_texture_compression_3dc",             o(ATI_texture_compression_3dc),             GL,             2004 },
+   { "GL_ATI_texture_compression_3dc",             o(ATI_texture_compression_3dc),             GLL,            2004 },
    { "GL_ATI_texture_env_combine3",                o(ATI_texture_env_combine3),                GLL,            2002 },
    { "GL_ATI_texture_float",                       o(ARB_texture_float),                       GL,             2002 },
    { "GL_ATI_texture_mirror_once",                 o(ATI_texture_mirror_once),                 GL,             2006 },
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index bfb0858b9bb..ff844cd24eb 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -534,7 +534,8 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       }
    }
 
-   if (ctx->Extensions.EXT_texture_compression_latc) {
+   if (ctx->API == API_OPENGL_COMPAT &&
+       ctx->Extensions.EXT_texture_compression_latc) {
       switch (internalFormat) {
       case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
       case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
@@ -547,7 +548,8 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       }
    }
 
-   if (ctx->Extensions.ATI_texture_compression_3dc) {
+   if (ctx->API == API_OPENGL_COMPAT &&
+       ctx->Extensions.ATI_texture_compression_3dc) {
       switch (internalFormat) {
       case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
          return GL_LUMINANCE_ALPHA;
-- 
cgit v1.2.3


From 48961fa3ba37999a6f8fd812458b735e39604a95 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 10 Sep 2015 10:48:46 -0700
Subject: mesa/extensions: restrict GL_OES_EGL_image to GLES

Driver vendors do this as well. The extension specification
lists GLES 1.1 or 2.0 as requirements.

Reviewed-by: Chad Versace <chad.versace@intel.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/extensions.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index b2c88c37366..767c50e28c0 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -307,8 +307,7 @@ static const struct extension extension_table[] = {
    { "GL_OES_depth_texture_cube_map",              o(OES_depth_texture_cube_map),                         ES2, 2012 },
    { "GL_OES_draw_texture",                        o(OES_draw_texture),                             ES1,       2004 },
    { "GL_OES_EGL_sync",                            o(dummy_true),                                   ES1 | ES2, 2010 },
-   /*  FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-   { "GL_OES_EGL_image",                           o(OES_EGL_image),                           GL | ES1 | ES2, 2006 },
+   { "GL_OES_EGL_image",                           o(OES_EGL_image),                                ES1 | ES2, 2006 },
    { "GL_OES_EGL_image_external",                  o(OES_EGL_image_external),                       ES1 | ES2, 2010 },
    { "GL_OES_element_index_uint",                  o(dummy_true),                                   ES1 | ES2, 2005 },
    { "GL_OES_fbo_render_mipmap",                   o(dummy_true),                                   ES1 | ES2, 2005 },
-- 
cgit v1.2.3


From 82007936491d5fb99cdc5ea18c076e8a896a4a1e Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 27 Aug 2015 16:05:22 -0700
Subject: mesa/teximage: restrict GL_ETC1_RGB8_OES support to GLES

According to the extensions table and our glext headers,
OES_compressed_ETC1_RGB8_texture is only supported in
GLES1 and GLES2. Since we may give users a GLES3 context
when a GLES2 context is requested, we also allow this
extension for GLES3 as well.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/teximage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index ff844cd24eb..2a4d29daf2f 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -558,7 +558,8 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       }
    }
 
-   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
+   if (_mesa_is_gles(ctx) &&
+      ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
       switch (internalFormat) {
       case GL_ETC1_RGB8_OES:
          return GL_RGB;
-- 
cgit v1.2.3


From f55836f5671de5381a609c8d45601e2a92c803ce Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 14 Sep 2015 15:09:00 -0700
Subject: i965/fs: Set output_components for lowered clip distance outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5cb794b5fd1..cea16e0b325 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -840,7 +840,9 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
    const fs_builder abld = bld.annotate("user clip distances");
 
    this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
+   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
    this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
+   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
 
    for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
       fs_reg u = userplane[i];
-- 
cgit v1.2.3


From 51824000541d8df9f870cbe0823a3835c9403bff Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 14 Sep 2015 14:18:13 -0700
Subject: i965/fs: Only read output_components many components when writing an
 output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cea16e0b325..8b61c867602 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -974,8 +974,10 @@ fs_visitor::emit_urb_writes()
                sources[length++] = reg;
             }
          } else {
-            for (int i = 0; i < 4; i++)
+            for (int i = 0; i < output_components[varying]; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
+            for (int i = output_components[varying]; i < 4; i++)
+               sources[length++] = fs_reg(0);
          }
          break;
       }
-- 
cgit v1.2.3


From 0c6df7a1cb593666636aed660e90be0c6aca4591 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 14 Sep 2015 15:36:24 -0700
Subject: i965/fs_surface_builder: Only apply predicate to components that
 exist

In certain conditions, we have to do bounds-checking in the shader for
image_load_store.  The way this works for image loads is that we do a
predicated load and then emit a series of selects, one per component,
that gives us 0 or the loaded value depending on whether or not you're
in bounds.  However, we were hard-coding 4 components which may not be
correct.  Instead, we should be using size which is the number of
components read.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 727e8d1b82a..88f22fa9c7f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -905,7 +905,7 @@ namespace brw {
             tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
 
             /* An out of bounds surface access should give zero as result. */
-            for (unsigned c = 0; c < 4; ++c)
+            for (unsigned c = 0; c < size; ++c)
                set_predicate(pred, bld.SEL(offset(tmp, bld, c),
                                            offset(tmp, bld, c), fs_reg(0)));
          }
-- 
cgit v1.2.3


From cadd7dd384b33a779d46bd664f456bed4a21a5b7 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 2 Jul 2015 15:41:02 -0700
Subject: i965/fs: Add a very basic validation pass

Currently the validation pass only validates that regs_read and
regs_written are consistent with the sizes of VGRF's.  We can add more as
we find it to be useful.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources    |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 10 +++++
 src/mesa/drivers/dri/i965/brw_fs.h            |  1 +
 src/mesa/drivers/dri/i965/brw_fs_validate.cpp | 57 +++++++++++++++++++++++++++
 4 files changed, 69 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_validate.cpp

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index b53802cf18d..2ef392a9f16 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -62,6 +62,7 @@ i965_FILES = \
 	brw_fs_sel_peephole.cpp \
 	brw_fs_surface_builder.cpp \
 	brw_fs_surface_builder.h \
+	brw_fs_validate.cpp \
 	brw_fs_vector_splitting.cpp \
 	brw_fs_visitor.cpp \
 	brw_gs.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b9f1051eca4..63bee0aa5fd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4781,6 +4781,9 @@ fs_visitor::calculate_register_pressure()
 void
 fs_visitor::optimize()
 {
+   /* Start by validating the shader we currently have. */
+   validate();
+
    /* bld is the common builder object pointing at the end of the program we
     * used to translate it into i965 IR.  For the optimization and lowering
     * passes coming next, any code added after the end of the program without
@@ -4797,7 +4800,10 @@ fs_visitor::optimize()
    assign_constant_locations();
    demote_pull_constants();
 
+   validate();
+
    split_virtual_grfs();
+   validate();
 
 #define OPT(pass, args...) ({                                           \
       pass_num++;                                                       \
@@ -4811,6 +4817,8 @@ fs_visitor::optimize()
          backend_shader::dump_instructions(filename);                   \
       }                                                                 \
                                                                         \
+      validate();                                                       \
+                                                                        \
       progress = progress || this_progress;                             \
       this_progress;                                                    \
    })
@@ -4872,6 +4880,8 @@ fs_visitor::optimize()
    OPT(lower_integer_multiplication);
 
    lower_uniform_pull_constant_loads();
+
+   validate();
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 5880f690ef1..1886dd8e5ff 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -151,6 +151,7 @@ public:
    void invalidate_live_intervals();
    void calculate_live_intervals();
    void calculate_register_pressure();
+   void validate();
    bool opt_algebraic();
    bool opt_redundant_discard_jumps();
    bool opt_cse();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
new file mode 100644
index 00000000000..d0e04f3bf47
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_validate.cpp
+ *
+ * Implements a pass that validates various invariants of the IR.  The current
+ * pass only validates that GRF's uses are sane.  More can be added later.
+ */
+
+#include "brw_fs.h"
+#include "brw_cfg.h"
+
+#define fsv_assert(cond) \
+   if (!(cond)) { \
+      fprintf(stderr, "ASSERT: FS validation failed!\n"); \
+      dump_instruction(inst, stderr); \
+      fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \
+      abort(); \
+   }
+
+void
+fs_visitor::validate()
+{
+   foreach_block_and_inst (block, fs_inst, inst, cfg) {
+      if (inst->dst.file == GRF) {
+         fsv_assert(inst->dst.reg_offset + inst->regs_written <=
+                    alloc.sizes[inst->dst.reg]);
+      }
+
+      for (unsigned i = 0; i < inst->sources; i++) {
+         if (inst->src[i].file == GRF) {
+            fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
+                       (int)alloc.sizes[inst->src[i].reg]);
+         }
+      }
+   }
+}
-- 
cgit v1.2.3


From b7eeced3c724bf5de05290551ced8621ce2c7c52 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 12:58:58 -0700
Subject: nir/lower_vec_to_movs: Pass the shader around directly

Previously, we were passing the shader around, we were just calling it
"mem_ctx".  However, the nir_shader is (and must be for the purposes of
mark-and-sweep) the mem_ctx so we might as well pass it around explicitly.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index b7f096d14ff..25a6f7d3ad9 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -54,12 +54,12 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src)
  */
 static unsigned
 insert_mov(nir_alu_instr *vec, unsigned start_channel,
-            unsigned start_src_idx, void *mem_ctx)
+           unsigned start_src_idx, nir_shader *shader)
 {
    unsigned src_idx = start_src_idx;
    assert(src_idx < nir_op_infos[vec->op].num_inputs);
 
-   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
    nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
    nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
 
@@ -84,7 +84,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
 }
 
 static bool
-lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+lower_vec_to_movs_block(nir_block *block, void *shader)
 {
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_alu)
@@ -115,7 +115,7 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
             continue;
 
          if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
-            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+            finished_write_mask |= insert_mov(vec, i, src_idx, shader);
             break;
          }
          src_idx++;
@@ -127,7 +127,7 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
             continue;
 
          if (!(finished_write_mask & (1 << i)))
-            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+            finished_write_mask |= insert_mov(vec, i, src_idx, shader);
 
          src_idx++;
       }
@@ -142,7 +142,9 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
 static void
 nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 {
-   nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+   nir_shader *shader = impl->overload->function->shader;
+
+   nir_foreach_block(impl, lower_vec_to_movs_block, shader);
 }
 
 void
-- 
cgit v1.2.3


From c3f8cde964f9850c86469a06d5eedf4e783cbf5c Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 13:42:14 -0700
Subject: nir/lower_vec_to_movs: Handle partially SSA shaders

v2 (Jason Ekstrand):
 - Use nir_instr_rewrite_dest
 - Pass the impl directly into lower_vec_to_movs_block

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 25a6f7d3ad9..993a1082f96 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -84,8 +84,11 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
 }
 
 static bool
-lower_vec_to_movs_block(nir_block *block, void *shader)
+lower_vec_to_movs_block(nir_block *block, void *void_impl)
 {
+   nir_function_impl *impl = void_impl;
+   nir_shader *shader = impl->overload->function->shader;
+
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_alu)
          continue;
@@ -101,8 +104,16 @@ lower_vec_to_movs_block(nir_block *block, void *shader)
          continue; /* The loop */
       }
 
-      /* Since we insert multiple MOVs, we have to be non-SSA. */
-      assert(!vec->dest.dest.is_ssa);
+      if (vec->dest.dest.is_ssa) {
+         /* Since we insert multiple MOVs, we have a register destination. */
+         nir_register *reg = nir_local_reg_create(impl);
+         reg->num_components = vec->dest.dest.ssa.num_components;
+
+         nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
+
+         nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest,
+                                nir_dest_for_reg(reg));
+      }
 
       unsigned finished_write_mask = 0;
 
@@ -142,9 +153,7 @@ lower_vec_to_movs_block(nir_block *block, void *shader)
 static void
 nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 {
-   nir_shader *shader = impl->overload->function->shader;
-
-   nir_foreach_block(impl, lower_vec_to_movs_block, shader);
+   nir_foreach_block(impl, lower_vec_to_movs_block, impl);
 }
 
 void
-- 
cgit v1.2.3


From c951bb83056724df02ba7e6fe2dfa720c0f45c1f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 13:55:39 -0700
Subject: i965/vec4_nir: Use partial SSA form rather than full non-SSA

We made this switch in the FS backend some time ago and it seems to make a
number of things a bit easier.  In particular, supporting SSA values takes
very little work in the backend and allows us to take advantage of the
majority of the SSA information even after we've gotten rid of Phi nodes.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c        |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4.h       |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 21 ++++++++++++++++++---
 3 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 8f3edc5cf01..f326b239d74 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -183,7 +183,7 @@ brw_create_nir(struct brw_context *brw,
       nir_print_shader(nir, stderr);
    }
 
-   nir_convert_from_ssa(nir, is_scalar);
+   nir_convert_from_ssa(nir, true);
    nir_validate_shader(nir);
 
    if (!is_scalar) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01c6e8492c7..de74ec9bb68 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -423,6 +423,7 @@ public:
    virtual void nir_emit_alu(nir_alu_instr *instr);
    virtual void nir_emit_jump(nir_jump_instr *instr);
    virtual void nir_emit_texture(nir_tex_instr *instr);
+   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
 
    dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
    dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 175d92b6b31..144f9e56eb7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -367,6 +367,10 @@ vec4_visitor::nir_emit_instr(nir_instr *instr)
       nir_emit_texture(nir_instr_as_tex(instr));
       break;
 
+   case nir_instr_type_ssa_undef:
+      nir_emit_undef(nir_instr_as_ssa_undef(instr));
+      break;
+
    default:
       fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
       break;
@@ -393,9 +397,14 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
 dst_reg
 vec4_visitor::get_nir_dest(nir_dest dest)
 {
-   assert(!dest.is_ssa);
-   return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
-                              dest.reg.indirect);
+   if (dest.is_ssa) {
+      dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+      nir_ssa_values[dest.ssa.index] = dst;
+      return dst;
+   } else {
+      return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+                                 dest.reg.indirect);
+   }
 }
 
 dst_reg
@@ -1529,4 +1538,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
                 mcs, is_cube_array, sampler, sampler_reg);
 }
 
+void
+vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+{
+   nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+}
+
 }
-- 
cgit v1.2.3


From 2b2f1f16a04dfba4e02a7f331befb5bd94d4a1be Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 14:47:28 -0700
Subject: nir/lower_vec_to_movs: Get rid of start_idx and swizzle compacting

Previously, we did this thing with keeping track of a separate start_idx
which was different from the iteration variable.  I think this was a relic
of the way that GLSL IR implements writemasks.  In NIR, if a given bit in
the writemask is unset then that channel is just "unused", not missing.  In
particular, a vec4 operation with a writemask of 0xd will use sources 0, 2,
and 3 and leave source 1 alone.  We can simplify things a good deal (and
make them correct) by removing this "compacting" step.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 993a1082f96..29dd0ca7235 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -53,29 +53,25 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src)
  * which ones have been processed.
  */
 static unsigned
-insert_mov(nir_alu_instr *vec, unsigned start_channel,
-           unsigned start_src_idx, nir_shader *shader)
+insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
 {
-   unsigned src_idx = start_src_idx;
-   assert(src_idx < nir_op_infos[vec->op].num_inputs);
+   assert(start_idx < nir_op_infos[vec->op].num_inputs);
 
    nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
-   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
+   nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
    nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
 
-   mov->dest.write_mask = (1u << start_channel);
-   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
-   src_idx++;
+   mov->dest.write_mask = (1u << start_idx);
+   mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
 
-   for (unsigned i = start_channel + 1; i < 4; i++) {
+   for (unsigned i = start_idx + 1; i < 4; i++) {
       if (!(vec->dest.write_mask & (1 << i)))
          continue;
 
-      if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
+      if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src)) {
          mov->dest.write_mask |= (1 << i);
-         mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
+         mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
       }
-      src_idx++;
    }
 
    nir_instr_insert_before(&vec->instr, &mov->instr);
@@ -121,26 +117,23 @@ lower_vec_to_movs_block(nir_block *block, void *void_impl)
        * destination reg, in case other values we're populating in the dest
        * might overwrite them.
        */
-      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+      for (unsigned i = 0; i < 4; i++) {
          if (!(vec->dest.write_mask & (1 << i)))
             continue;
 
-         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
-            finished_write_mask |= insert_mov(vec, i, src_idx, shader);
+         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) {
+            finished_write_mask |= insert_mov(vec, i, shader);
             break;
          }
-         src_idx++;
       }
 
       /* Now, emit MOVs for all the other src channels. */
-      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+      for (unsigned i = 0; i < 4; i++) {
          if (!(vec->dest.write_mask & (1 << i)))
             continue;
 
          if (!(finished_write_mask & (1 << i)))
-            finished_write_mask |= insert_mov(vec, i, src_idx, shader);
-
-         src_idx++;
+            finished_write_mask |= insert_mov(vec, i, shader);
       }
 
       nir_instr_remove(&vec->instr);
-- 
cgit v1.2.3


From 2458ea95c5676807a064f24ec720f12506975402 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 14:40:06 -0700
Subject: nir/lower_vec_to_movs: Coalesce movs on-the-fly when possible

The old pass blindly inserted a bunch of moves into the shader with no
concern for whether or not it was really needed.  This adds code to try and
coalesce into the destination of the instruction providing the value.

Shader-db results for vec4 shaders on Haswell:

   total instructions in shared programs: 1754420 -> 1747753 (-0.38%)
   instructions in affected programs:     231230 -> 224563 (-2.88%)
   helped:                                1017
   HURT:                                  2

This approach is heavily based on a different patch by Eduardo Lima Mitev
<elima@igalia.com>.  Eduardo's patch did this in a separate pass as opposed
to integrating it into nir_lower_vec_to_movs.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 85 ++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 29dd0ca7235..9ff86ea7543 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -79,6 +79,88 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
    return mov->dest.write_mask;
 }
 
+/* Attempts to coalesce the "move" from the given source of the vec to the
+ * destination of the instruction generating the value. If, for whatever
+ * reason, we cannot coalesce the mmove, it does nothing and returns 0.  We
+ * can then call insert_mov as normal.
+ */
+static unsigned
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+   assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+   /* We will only even try if the source is SSA */
+   if (!vec->src[start_idx].src.is_ssa)
+      return 0;
+
+   assert(vec->src[start_idx].src.ssa);
+
+   /* If we are going to do a reswizzle, then the vecN operation must be the
+    * only use of the source value.  We also can't have any source modifiers.
+    */
+   nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+      if (src->parent_instr != &vec->instr)
+         return 0;
+
+      nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
+      if (alu_src->abs || alu_src->negate)
+         return 0;
+   }
+
+   if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+      return 0;
+
+   if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
+      return 0;
+
+   nir_alu_instr *src_alu =
+      nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
+
+   /* We only care about being able to re-swizzle the instruction if it is
+    * something that we can reswizzle.  It must be per-component.
+    */
+   if (nir_op_infos[src_alu->op].output_size != 0)
+      return 0;
+
+   /* If we are going to reswizzle the instruction, we can't have any
+    * non-per-component sources either.
+    */
+   for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+      if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+         return 0;
+
+   /* Stash off all of the ALU instruction's swizzles. */
+   uint8_t swizzles[4][4];
+   for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+      for (unsigned i = 0; i < 4; i++)
+         swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+   unsigned write_mask = 0;
+   for (unsigned i = start_idx; i < 4; i++) {
+      if (!(vec->dest.write_mask & (1 << i)))
+         continue;
+
+      if (!vec->src[i].src.is_ssa ||
+          vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
+         continue;
+
+      /* At this point, the give vec source matchese up with the ALU
+       * instruction so we can re-swizzle that component to match.
+       */
+      write_mask |= 1 << i;
+      for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+         src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+
+      /* Clear the no longer needed vec source */
+      nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
+   }
+
+   nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest);
+   src_alu->dest.write_mask = write_mask;
+
+   return write_mask;
+}
+
 static bool
 lower_vec_to_movs_block(nir_block *block, void *void_impl)
 {
@@ -132,6 +214,9 @@ lower_vec_to_movs_block(nir_block *block, void *void_impl)
          if (!(vec->dest.write_mask & (1 << i)))
             continue;
 
+         if (!(finished_write_mask & (1 << i)))
+            finished_write_mask |= try_coalesce(vec, i, shader);
+
          if (!(finished_write_mask & (1 << i)))
             finished_write_mask |= insert_mov(vec, i, shader);
       }
-- 
cgit v1.2.3


From 47739c7df430664c3a998163a1e8a4a5e1901691 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 10:51:46 -0700
Subject: nir: Add a fdot instruction that replicates the result to a vec4

Fortunately, nir_constant_expr already auto-splats if "dst" never shows up
in the constant expression field so we don't need to do anything there.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir.h                | 6 ++++++
 src/glsl/nir/nir_opcodes.py       | 3 +++
 src/glsl/nir/nir_opt_algebraic.py | 3 +++
 3 files changed, 12 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3f693b17fb1..4e4543ad5ec 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
    /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
    bool lower_scmp;
 
+   /* Does the native fdot instruction replicate its result for four
+    * components?  If so, then opt_algebraic_late will turn all fdotN
+    * instructions into fdot_replicatedN instructions.
+    */
+   bool fdot_replicates;
+
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
     * are simulated by floats.)
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index df5b7e2d517..495d109375b 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
              "{src}")
 
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+             "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 226e0a8d85c..acc3b04b118 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -240,6 +240,9 @@ late_optimizations = [
    (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
    (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
    (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
 ]
 
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
-- 
cgit v1.2.3


From a88ce0c1c4c1f77209b71d5a6858f952642f385a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 11:08:15 -0700
Subject: i965/vec4: Use the replicated fdot instruction in NIR

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 8 ++++++++
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +++---
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index cf9aa232386..eed73fbadc3 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -96,6 +96,14 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
     */
    nir_options->lower_ffma = true;
    nir_options->lower_sub = true;
+   /* In the vec4 backend, our dpN instruction replicates its result to all
+    * the components of a vec4.  We would like NIR to give us replicated fdot
+    * instructions because it can optimize better for us.
+    *
+    * For the FS backend, it should be lowered away by the scalarizing pass so
+    * we should never see fdot anyway.
+    */
+   nir_options->fdot_replicates = true;
 
    /* We want the GLSL compiler to emit code that uses condition codes */
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 144f9e56eb7..482fce20cd9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1254,17 +1254,17 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
 
-   case nir_op_fdot2:
+   case nir_op_fdot_replicated2:
       inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_fdot3:
+   case nir_op_fdot_replicated3:
       inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_fdot4:
+   case nir_op_fdot_replicated4:
       inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
-- 
cgit v1.2.3


From 29348631fe7bf732a38856ea842cfc7aa2263468 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 9 Sep 2015 17:18:55 -0700
Subject: nir/lower_vec_to_movs: Coalesce into destinations of fdot
 instructions

Now that we have a replicating fdot instruction, we can actually coalesce
into the destinations of vec4 instructions.  We couldn't really do this
before because, if the destination had to end up in .z, we couldn't
reswizzle the instruction.  With a replicated destination, the result ends
up in all channels so we can just set the writemask and we're done.

Shader-db results for vec4 programs on Haswell:

   total instructions in shared programs: 1747753 -> 1746280 (-0.08%)
   instructions in affected programs:     143274 -> 141801 (-1.03%)
   helped:                                667
   HURT:                                  0

It turns out that dot-products matter...

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 49 ++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 9ff86ea7543..2cb0457f9ba 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -79,6 +79,14 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
    return mov->dest.write_mask;
 }
 
+static bool
+has_replicated_dest(nir_alu_instr *alu)
+{
+   return alu->op == nir_op_fdot_replicated2 ||
+          alu->op == nir_op_fdot_replicated3 ||
+          alu->op == nir_op_fdot_replicated4;
+}
+
 /* Attempts to coalesce the "move" from the given source of the vec to the
  * destination of the instruction generating the value. If, for whatever
  * reason, we cannot coalesce the mmove, it does nothing and returns 0.  We
@@ -116,19 +124,28 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
    nir_alu_instr *src_alu =
       nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
 
-   /* We only care about being able to re-swizzle the instruction if it is
-    * something that we can reswizzle.  It must be per-component.
-    */
-   if (nir_op_infos[src_alu->op].output_size != 0)
-      return 0;
-
-   /* If we are going to reswizzle the instruction, we can't have any
-    * non-per-component sources either.
-    */
-   for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
-      if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+   if (has_replicated_dest(src_alu)) {
+      /* The fdot instruction is special: It replicates its result to all
+       * components.  This means that we can always rewrite its destination
+       * and we don't need to swizzle anything.
+       */
+   } else {
+      /* We only care about being able to re-swizzle the instruction if it is
+       * something that we can reswizzle.  It must be per-component.  The one
+       * exception to this is the fdotN instructions which implicitly splat
+       * their result out to all channels.
+       */
+      if (nir_op_infos[src_alu->op].output_size != 0)
          return 0;
 
+      /* If we are going to reswizzle the instruction, we can't have any
+       * non-per-component sources either.
+       */
+      for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+         if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+            return 0;
+   }
+
    /* Stash off all of the ALU instruction's swizzles. */
    uint8_t swizzles[4][4];
    for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
@@ -148,8 +165,14 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
        * instruction so we can re-swizzle that component to match.
        */
       write_mask |= 1 << i;
-      for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
-         src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+      if (has_replicated_dest(src_alu)) {
+         /* Since the destination is a single replicated value, we don't need
+          * to do any reswizzling
+          */
+      } else {
+         for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+            src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+      }
 
       /* Clear the no longer needed vec source */
       nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
-- 
cgit v1.2.3


From 76977222af3dcf6c0915830c7b7af06505f8cd9a Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 14 Sep 2015 16:59:36 -0400
Subject: freedreno: update generated headers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h     |  6 +++---
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h     | 23 +++++++++++++++++++----
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h     |  9 +++++----
 src/gallium/drivers/freedreno/adreno_common.xml.h | 10 +++++++---
 src/gallium/drivers/freedreno/adreno_pm4.xml.h    |  6 +++---
 5 files changed, 37 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index dd489568a77..2853787a340 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index a157dc33db9..4bbcb33614c 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -280,6 +280,8 @@ enum a3xx_rb_blend_opcode {
 enum a3xx_intp_mode {
 	SMOOTH = 0,
 	FLAT = 1,
+	ZERO = 2,
+	ONE = 3,
 };
 
 enum a3xx_repl_mode {
@@ -684,6 +686,12 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD				0x00800000
 #define A3XX_GRAS_CL_CLIP_CNTL_WCOORD				0x01000000
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE			0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK	0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT	26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}
 
 #define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
 #define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
@@ -774,7 +782,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
 #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
 {
-	return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+	return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
 }
 
 #define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
@@ -895,6 +903,9 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
 #define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000
 
 #define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE		0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE			0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE		0x00000004
 #define A3XX_RB_RENDER_CONTROL_FACENESS				0x00000008
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
@@ -908,6 +919,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
 #define A3XX_RB_RENDER_CONTROL_YCOORD				0x00008000
 #define A3XX_RB_RENDER_CONTROL_ZCOORD				0x00010000
 #define A3XX_RB_RENDER_CONTROL_WCOORD				0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE			0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE		0x00100000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST			0x00400000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
@@ -915,6 +928,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compar
 {
 	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
 }
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE		0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE			0x80000000
 
 #define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
 #define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 2e1d712a28c..819f5b14a17 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -249,7 +249,8 @@ enum a4xx_tex_clamp {
 	A4XX_TEX_REPEAT = 0,
 	A4XX_TEX_CLAMP_TO_EDGE = 1,
 	A4XX_TEX_MIRROR_REPEAT = 2,
-	A4XX_TEX_CLAMP_NONE = 3,
+	A4XX_TEX_CLAMP_TO_BORDER = 3,
+	A4XX_TEX_MIRROR_CLAMP = 4,
 };
 
 enum a4xx_tex_aniso {
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 29944b7ac08..906368c0efa 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -85,6 +85,10 @@ enum adreno_rb_blend_factor {
 	FACTOR_CONSTANT_ALPHA = 14,
 	FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
 	FACTOR_SRC_ALPHA_SATURATE = 16,
+	FACTOR_SRC1_COLOR = 20,
+	FACTOR_ONE_MINUS_SRC1_COLOR = 21,
+	FACTOR_SRC1_ALPHA = 22,
+	FACTOR_ONE_MINUS_SRC1_ALPHA = 23,
 };
 
 enum adreno_rb_surface_endian {
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 432dce3dfb3..490cf5beaf0 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
-- 
cgit v1.2.3


From 9124a49d54af5d7bd8230af4ba3eebfb167a7655 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 15 Sep 2015 09:23:21 -0400
Subject: freedreno: helper for a3xx/a4xx border-colors

Both use the same layout for the buffer containing border-color values,
so rather than duplicating the logic in a4xx, split it out into a
helper.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_context.h  | 16 ------
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     | 53 +-----------------
 src/gallium/drivers/freedreno/freedreno_texture.c | 66 +++++++++++++++++++++++
 src/gallium/drivers/freedreno/freedreno_texture.h | 31 +++++++++++
 4 files changed, 99 insertions(+), 67 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 250bcf89596..b4c2ebe570c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -73,22 +73,6 @@ struct fd3_context {
 	 */
 	struct fd_vertex_state blit_vbuf_state;
 
-
-	/*
-	 * Border color layout *appears* to be as arrays of 0x40 byte
-	 * elements, with frag shader elements starting at (16 x 0x40).
-	 * But at some point I should probably experiment more with
-	 * samplers in vertex shaders to be sure.  Unclear about why
-	 * there is this offset when there are separate VS and FS base
-	 * addr regs.
-	 *
-	 * The first 8 bytes of each entry are the requested border
-	 * color in fp16.  Unclear about the rest.. could be used for
-	 * other formats, or could simply be for aligning the pitch
-	 * to 32 pixels.
-	 */
-#define BORDERCOLOR_SIZE 0x40
-
 	struct u_upload_mgr *border_color_uploader;
 	struct pipe_resource *border_color_buf;
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 6f514ed05df..b81bc5a90a4 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -149,6 +149,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			&fd3_ctx->border_color_buf,
 			&ptr);
 
+	fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
 	if (tex->num_samplers > 0) {
 		/* output sampler state: */
 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
@@ -163,57 +165,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
 					fd3_sampler_stateobj(tex->samplers[i]) :
 					&dummy_sampler;
-			uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
-					(BORDERCOLOR_SIZE * tex_off[sb]) +
-					(BORDERCOLOR_SIZE * i));
-			uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
-			/*
-			 * XXX HACK ALERT XXX
-			 *
-			 * The border colors need to be swizzled in a particular
-			 * format-dependent order. Even though samplers don't know about
-			 * formats, we can assume that with a GL state tracker, there's a
-			 * 1:1 correspondence between sampler and texture. Take advantage
-			 * of that knowledge.
-			 */
-			if (i < tex->num_textures && tex->textures[i]) {
-				const struct util_format_description *desc =
-					util_format_description(tex->textures[i]->format);
-				for (j = 0; j < 4; j++) {
-					if (desc->swizzle[j] >= 4)
-						continue;
-
-					const struct util_format_channel_description *chan =
-						&desc->channel[desc->swizzle[j]];
-					int size = chan->size;
-
-					/* The Z16 texture format we use seems to look in the
-					 * 32-bit border color slots
-					 */
-					if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
-						size = 32;
-
-					/* Formats like R11G11B10 or RGB9_E5 don't specify
-					 * per-channel sizes properly.
-					 */
-					if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
-						size = 16;
-
-					if (chan->pure_integer && size > 16)
-						bcolor32[desc->swizzle[j] + 4] =
-							sampler->base.border_color.i[j];
-					else if (size > 16)
-						bcolor32[desc->swizzle[j]] =
-							fui(sampler->base.border_color.f[j]);
-					else if (chan->pure_integer)
-						bcolor[desc->swizzle[j] + 8] =
-							sampler->base.border_color.i[j];
-					else
-						bcolor[desc->swizzle[j]] =
-							util_float_to_half(sampler->base.border_color.f[j]);
-				}
-			}
 
 			OUT_RING(ring, sampler->texsamp0);
 			OUT_RING(ring, sampler->texsamp1);
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index eaa6629f2b8..04e4643b4c9 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -162,3 +162,69 @@ fd_texture_init(struct pipe_context *pctx)
 
 	pctx->sampler_view_destroy = fd_sampler_view_destroy;
 }
+
+/* helper for setting up border-color buffer for a3xx/a4xx: */
+void
+fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+		unsigned offset)
+{
+	unsigned i, j;
+
+	for (i = 0; i < tex->num_samplers; i++) {
+		struct pipe_sampler_state *sampler = tex->samplers[i];
+		uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
+				(BORDERCOLOR_SIZE * offset) +
+				(BORDERCOLOR_SIZE * i));
+		uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
+
+		if (!sampler)
+			continue;
+
+		/*
+		 * XXX HACK ALERT XXX
+		 *
+		 * The border colors need to be swizzled in a particular
+		 * format-dependent order. Even though samplers don't know about
+		 * formats, we can assume that with a GL state tracker, there's a
+		 * 1:1 correspondence between sampler and texture. Take advantage
+		 * of that knowledge.
+		 */
+		if (i < tex->num_textures && tex->textures[i]) {
+			const struct util_format_description *desc =
+					util_format_description(tex->textures[i]->format);
+			for (j = 0; j < 4; j++) {
+				if (desc->swizzle[j] >= 4)
+					continue;
+
+				const struct util_format_channel_description *chan =
+						&desc->channel[desc->swizzle[j]];
+				int size = chan->size;
+
+				/* The Z16 texture format we use seems to look in the
+				 * 32-bit border color slots
+				 */
+				if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+					size = 32;
+
+				/* Formats like R11G11B10 or RGB9_E5 don't specify
+				 * per-channel sizes properly.
+				 */
+				if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
+					size = 16;
+
+				if (chan->pure_integer && size > 16)
+					bcolor32[desc->swizzle[j] + 4] =
+							sampler->border_color.i[j];
+				else if (size > 16)
+					bcolor32[desc->swizzle[j]] =
+							fui(sampler->border_color.f[j]);
+				else if (chan->pure_integer)
+					bcolor[desc->swizzle[j] + 8] =
+							sampler->border_color.i[j];
+				else
+					bcolor[desc->swizzle[j]] =
+							util_float_to_half(sampler->border_color.f[j]);
+			}
+		}
+	}
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.h b/src/gallium/drivers/freedreno/freedreno_texture.h
index 43571a9fa61..fa27d1c32af 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.h
+++ b/src/gallium/drivers/freedreno/freedreno_texture.h
@@ -41,4 +41,35 @@ void fd_set_sampler_views(struct pipe_context *pctx, unsigned shader,
 
 void fd_texture_init(struct pipe_context *pctx);
 
+struct fd_texture_stateobj;
+
+/* Both a3xx/a4xx share the same layout for the border-color buffer,
+ * which contains the pre-swizzled (based on texture format) border
+ * color value, with the following layout (per sampler):
+ *
+ *  offset | description
+ *  -------+-------------
+ *  0x00:  | fp16[0]   \
+ *         | fp16[1]   |___ swizzled fp16 channel values for "small float"
+ *         | fp16[2]   |    formats (<= 16 bits per component, !integer)
+ *         | fp16[3]   /
+ *  0x08:  | padding
+ *  0x10:  | int16[0]  \
+ *         | int16[1]  |___ swizzled int16 channels for for "small integer"
+ *         | int16[2]  |    formats (<= 16 bits per component, integer)
+ *         | int16[3]  /
+ *  0x18:  | padding
+ *  0x20:  | fp32[0]   \
+ *         | fp32[1]   |___ swizzled fp32 channel values for "large float"
+ *         | fp32[2]   |    formats (> 16 bits per component, !integer)
+ *         | fp32[3]   /
+ *  0x30:  | int32[0]  \
+ *         | int32[1]  |___ swizzled int32 channel values for "large int"
+ *         | int32[2]  |    formats (> 16 bits per component, integer)
+ *         | int32[3]  /
+ */
+#define BORDERCOLOR_SIZE 0x40
+void fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+		unsigned offset);
+
 #endif /* FREEDRENO_TEXTURE_H_ */
-- 
cgit v1.2.3


From f8222724f539b9e16affc0f4ddd95cfda27293a5 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 15 Sep 2015 17:25:25 -0400
Subject: freedreno/a4xx: wire up texture clamp lowering

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 99 +++++++++++++++++++-----
 src/gallium/drivers/freedreno/a4xx/fd4_texture.h |  1 +
 2 files changed, 80 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 213b29c9181..5ee022b30a9 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -35,32 +35,31 @@
 #include "fd4_texture.h"
 #include "fd4_format.h"
 
-/* TODO do we need to emulate clamp-to-edge like a3xx? */
 static enum a4xx_tex_clamp
-tex_clamp(unsigned wrap)
+tex_clamp(unsigned wrap, bool clamp_to_edge)
 {
-	/* hardware probably supports more, but we can't coax all the
-	 * wrap/clamp modes out of the GLESv2 blob driver.
-	 *
-	 * TODO once we have basics working, go back and just try
-	 * different values and see what happens
-	 */
+	/* Hardware does not support _CLAMP, but we emulate it: */
+	if (wrap == PIPE_TEX_WRAP_CLAMP) {
+		wrap = (clamp_to_edge) ?
+			PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+	}
+
 	switch (wrap) {
 	case PIPE_TEX_WRAP_REPEAT:
 		return A4XX_TEX_REPEAT;
-	case PIPE_TEX_WRAP_CLAMP:
 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 		return A4XX_TEX_CLAMP_TO_EDGE;
-	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-// TODO
-//		return A4XX_TEX_CLAMP_TO_BORDER;
-	case PIPE_TEX_WRAP_MIRROR_CLAMP:
-	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-// TODO
-//		return A4XX_TEX_MIRROR_CLAMP;
+		/* only works for PoT.. need to emulate otherwise! */
+		return A4XX_TEX_MIRROR_CLAMP;
 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
 		return A4XX_TEX_MIRROR_REPEAT;
+	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+	case PIPE_TEX_WRAP_MIRROR_CLAMP:
+	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+		/* these two we could perhaps emulate, but we currently
+		 * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+		 */
 	default:
 		DBG("invalid wrap: %u", wrap);
 		return 0;
@@ -88,6 +87,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 	struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
 	unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
 	bool miplinear = false;
+	bool clamp_to_edge;
 
 	if (!so)
 		return NULL;
@@ -97,14 +97,29 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 
 	so->base = *cso;
 
+	/*
+	 * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE;  for linear
+	 * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+	 * clamping the texture coordinates to [0.0, 1.0].
+	 *
+	 * The clamping will be taken care of in the shaders.  There are two
+	 * filters here, but let the minification one has a say.
+	 */
+	clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+	if (!clamp_to_edge) {
+		so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+		so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+		so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+	}
+
 	so->texsamp0 =
 		COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
 		A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
 		A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
 		A4XX_TEX_SAMP_0_ANISO(aniso) |
-		A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
-		A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
-		A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+		A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
+		A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
+		A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
 
 	so->texsamp1 =
 //		COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
@@ -122,6 +137,50 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 	return so;
 }
 
+static void
+fd4_sampler_states_bind(struct pipe_context *pctx,
+		unsigned shader, unsigned start,
+		unsigned nr, void **hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd4_context *fd4_ctx = fd4_context(ctx);
+	uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
+	unsigned i;
+
+	for (i = 0; i < nr; i++) {
+		if (hwcso[i]) {
+			struct fd4_sampler_stateobj *sampler =
+					fd4_sampler_stateobj(hwcso[i]);
+			if (sampler->saturate_s)
+				saturate_s |= (1 << i);
+			if (sampler->saturate_t)
+				saturate_t |= (1 << i);
+			if (sampler->saturate_r)
+				saturate_r |= (1 << i);
+		}
+	}
+
+	fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+
+	if (shader == PIPE_SHADER_FRAGMENT) {
+		fd4_ctx->fsaturate =
+			(saturate_s != 0) ||
+			(saturate_t != 0) ||
+			(saturate_r != 0);
+		fd4_ctx->fsaturate_s = saturate_s;
+		fd4_ctx->fsaturate_t = saturate_t;
+		fd4_ctx->fsaturate_r = saturate_r;
+	} else if (shader == PIPE_SHADER_VERTEX) {
+		fd4_ctx->vsaturate =
+			(saturate_s != 0) ||
+			(saturate_t != 0) ||
+			(saturate_r != 0);
+		fd4_ctx->vsaturate_s = saturate_s;
+		fd4_ctx->vsaturate_t = saturate_t;
+		fd4_ctx->vsaturate_r = saturate_r;
+	}
+}
+
 static enum a4xx_tex_type
 tex_type(unsigned target)
 {
@@ -209,7 +268,7 @@ void
 fd4_texture_init(struct pipe_context *pctx)
 {
 	pctx->create_sampler_state = fd4_sampler_state_create;
-	pctx->bind_sampler_states = fd_sampler_states_bind;
+	pctx->bind_sampler_states = fd4_sampler_states_bind;
 	pctx->create_sampler_view = fd4_sampler_view_create;
 	pctx->set_sampler_views = fd_set_sampler_views;
 }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 84ee7ecb50c..31955770a85 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -40,6 +40,7 @@
 struct fd4_sampler_stateobj {
 	struct pipe_sampler_state base;
 	uint32_t texsamp0, texsamp1;
+	bool saturate_s, saturate_t, saturate_r;
 };
 
 static inline struct fd4_sampler_stateobj *
-- 
cgit v1.2.3


From d85267c4bb5416dc3fbac7798b4bb68247340508 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 15 Sep 2015 17:25:47 -0400
Subject: freedreno/a4xx: border-color support

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a4xx/fd4_context.c |  5 +++++
 src/gallium/drivers/freedreno/a4xx/fd4_context.h |  5 +++++
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c    | 20 +++++++++++++++++++-
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c |  3 ++-
 4 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index 625512ccd1b..e53e0c56c9a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -55,6 +55,8 @@ fd4_context_destroy(struct pipe_context *pctx)
 	pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
 	pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
 
+	u_upload_destroy(fd4_ctx->border_color_uploader);
+
 	fd_context_destroy(pctx);
 }
 
@@ -169,5 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 
 	fd4_query_context_init(pctx);
 
+	fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
+			2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+
 	return pctx;
 }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index af9475699db..074c5a752bf 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -29,6 +29,8 @@
 #ifndef FD4_CONTEXT_H_
 #define FD4_CONTEXT_H_
 
+#include "util/u_upload_mgr.h"
+
 #include "freedreno_drmif.h"
 
 #include "freedreno_context.h"
@@ -70,6 +72,9 @@ struct fd4_context {
 	 */
 	struct fd_vertex_state blit_vbuf_state;
 
+	struct u_upload_mgr *border_color_uploader;
+	struct pipe_resource *border_color_buf;
+
 	/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
 	bool vsaturate, fsaturate;
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index b75be29e523..5f36cef3e9a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -124,7 +124,20 @@ static void
 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		enum adreno_state_block sb, struct fd_texture_stateobj *tex)
 {
-	unsigned i;
+	static const uint32_t bcolor_reg[] = {
+			[SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+			[SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+	};
+	struct fd4_context *fd4_ctx = fd4_context(ctx);
+	unsigned i, off;
+	void *ptr;
+
+	u_upload_alloc(fd4_ctx->border_color_uploader,
+			0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+			&fd4_ctx->border_color_buf,
+			&ptr);
+
+	fd_setup_border_colors(tex, ptr, 0);
 
 	if (tex->num_samplers > 0) {
 		int num_samplers;
@@ -190,6 +203,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			OUT_RING(ring, 0x00000000);
 		}
 	}
+
+	OUT_PKT0(ring, bcolor_reg[sb], 1);
+	OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+
+	u_upload_unmap(fd4_ctx->border_color_uploader);
 }
 
 /* emit texture state for mem->gmem restore operation.. eventually it would
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 5ee022b30a9..dbff5a738fd 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -49,12 +49,13 @@ tex_clamp(unsigned wrap, bool clamp_to_edge)
 		return A4XX_TEX_REPEAT;
 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 		return A4XX_TEX_CLAMP_TO_EDGE;
+	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+		return A4XX_TEX_CLAMP_TO_BORDER;
 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 		/* only works for PoT.. need to emulate otherwise! */
 		return A4XX_TEX_MIRROR_CLAMP;
 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
 		return A4XX_TEX_MIRROR_REPEAT;
-	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 	case PIPE_TEX_WRAP_MIRROR_CLAMP:
 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 		/* these two we could perhaps emulate, but we currently
-- 
cgit v1.2.3


From 18385bc3ac867bf7fb4070fe0f90bdf8e3e515a4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 14 Sep 2015 15:15:06 -0400
Subject: freedreno/a4xx: more texture formats

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 6c9e217d5ad..847d4fb6d63 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -89,13 +89,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
 	_T(I8_UNORM,   8_UNORM, NONE,     WZYX),
 
-	/* NOTE: should be TFMT_8_UINT (which then gets remapped to
-	 * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
-	 * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
-	 * for now.. sampling from stencil as a texture might not
-	 * work right, but at least should be fine for zsbuf..
-	 */
-	_T(S8_UINT,    8_UNORM,  R8_UNORM, WZYX),
+	_T(A8_UINT,    8_UINT,  NONE,     WZYX),
+	_T(A8_SINT,    8_SINT,  NONE,     WZYX),
+	_T(L8_UINT,    8_UINT,  NONE,     WZYX),
+	_T(L8_SINT,    8_SINT,  NONE,     WZYX),
+	_T(I8_UINT,    8_UINT,  NONE,     WZYX),
+	_T(I8_SINT,    8_SINT,  NONE,     WZYX),
+
+	_T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
 
 	/* 16-bit */
 	V_(R16_UNORM,   16_UNORM, NONE,     WZYX),
-- 
cgit v1.2.3


From cb503c322754dd9dba016e703cf8b30177ed157b Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 15 Sep 2015 12:09:06 -0700
Subject: nir/builder: Use a normal temporary array in nir_channel

C++ gets cranky if we take references of temporaries.  This isn't a problem
yet in master because nir_builder is never used from C++.  However, it will
be in the future so we should fix it now.

Reviewed-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir_builder.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index cf50f699eae..44134cf4c29 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -219,7 +219,8 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
 static inline nir_ssa_def *
 nir_channel(nir_builder *b, nir_ssa_def *def, int c)
 {
-   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+   unsigned swizzle[4] = {c, c, c, c};
+   return nir_swizzle(b, def, swizzle, 1, false);
 }
 
 /**
-- 
cgit v1.2.3


From 47e18a595731c054ac254e26066e6dea804f34e8 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Tue, 15 Sep 2015 14:01:17 -0700
Subject: i965/fs: The barrier send uses only 1 payload register

When preparing the barrier payload, the instructions should operate in
simd8 mode since we only use 1 payload register.

fs_inst::regs_read is also updated to indicate that it only reads one
register for SHADER_OPCODE_BARRIER.

These issues were flagged by:

commit cadd7dd384b33a779d46bd664f456bed4a21a5b7
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date:   Thu Jul 2 15:41:02 2015 -0700

    i965/fs: Add a very basic validation pass

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 63bee0aa5fd..b4d05674260 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -797,6 +797,7 @@ fs_inst::regs_read(int arg) const
       break;
 
    case CS_OPCODE_CS_TERMINATE:
+   case SHADER_OPCODE_BARRIER:
       return 1;
 
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8b61c867602..b85b52b38d8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1045,12 +1045,14 @@ fs_visitor::emit_barrier()
 
    fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 
+   const fs_builder pbld = bld.exec_all().group(8, 0);
+
    /* Clear the message payload */
-   bld.exec_all().MOV(payload, fs_reg(0u));
+   pbld.MOV(payload, fs_reg(0u));
 
    /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
    fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
-   bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+   pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
 
    /* Emit a gateway "barrier" message using the payload we set up, followed
     * by a wait instruction.
-- 
cgit v1.2.3


From d9efe40dc924b8bfd93c0572bd70c0585f823628 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 14 Sep 2015 11:13:19 -0400
Subject: nir: add lowering for ffract

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir.h                | 3 +++
 src/glsl/nir/nir_opt_algebraic.py | 1 +
 2 files changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4e4543ad5ec..fffb2f45719 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1440,6 +1440,9 @@ typedef struct nir_shader_compiler_options {
     */
    bool fdot_replicates;
 
+   /** lowers ffract to fsub+ffloor: */
+   bool lower_ffract;
+
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
     * are simulated by floats.)
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index acc3b04b118..43558a547b4 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -76,6 +76,7 @@ optimizations = [
    (('flrp', a, a, b), a),
    (('flrp', 0.0, a, b), ('fmul', a, b)),
    (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+   (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
    (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
    (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
-- 
cgit v1.2.3


From b3958f9f8387b5967530ff77a08120074042c8e5 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 14 Sep 2015 11:54:05 -0400
Subject: freedreno/ir3: use NIR to lower ffract instead of tgsi_lowering

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 5a069fb5378..7ce1c976e9c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -134,6 +134,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
 			.lower_fsat = true,
 			.lower_scmp = true,
 			.lower_flrp = true,
+			.lower_ffract = true,
 			.native_integers = true,
 	};
 	bool progress;
@@ -187,7 +188,6 @@ lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
 	struct tgsi_shader_info info;
 	struct tgsi_lowering_config lconfig = {
 			.color_two_side = so->key.color_two_side,
-			.lower_FRC = true,
 	};
 
 	switch (so->type) {
-- 
cgit v1.2.3


From 5bb41d9094b3c9bdf0669fd55418981ed83347e3 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 4 Sep 2015 11:35:33 -0400
Subject: freedreno: one screen to rule them all

Similar to fee0686c21c631d96d6042741267a3c218c23ffc, but in this case to
ensure that drm_gralloc and libGLES_mesa are sharing a single screen.

Bumps libdrm_freedreno version dependency, as it requires the new
fd_device_fd() API.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac                                       |   2 +-
 src/gallium/drivers/freedreno/freedreno_screen.c   |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.h   |  10 ++
 src/gallium/targets/dri/dri.sym                    |   1 +
 .../winsys/freedreno/drm/freedreno_drm_winsys.c    | 117 ++++++++++++++++++++-
 5 files changed, 126 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/configure.ac b/configure.ac
index d8e56439a54..71c9b6c97fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 8000279ae80..a3dede2500e 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -483,6 +483,7 @@ fd_screen_create(struct fd_device *dev)
 	pscreen = &screen->base;
 
 	screen->dev = dev;
+	screen->refcnt = 1;
 
 	// maybe this should be in context?
 	screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 4e5c3a61958..8fb096a10dd 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -42,6 +42,16 @@ struct fd_bo;
 struct fd_screen {
 	struct pipe_screen base;
 
+	/* it would be tempting to use pipe_reference here, but that
+	 * really doesn't work well if it isn't the first member of
+	 * the struct, so not quite so awesome to be adding refcnting
+	 * further down the inheritance hierarchy:
+	 */
+	int refcnt;
+
+	/* place for winsys to stash it's own stuff: */
+	void *winsys_priv;
+
 	uint32_t gmemsize_bytes;
 	uint32_t device_id;
 	uint32_t gpu_id;         /* 220, 305, etc */
diff --git a/src/gallium/targets/dri/dri.sym b/src/gallium/targets/dri/dri.sym
index 8e26fb960b7..1fdf18beee7 100644
--- a/src/gallium/targets/dri/dri.sym
+++ b/src/gallium/targets/dri/dri.sym
@@ -5,6 +5,7 @@
 		nouveau_drm_screen_create;
 		radeon_drm_winsys_create;
 		amdgpu_winsys_create;
+		fd_drm_screen_create;
 	local:
 		*;
 };
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
index 9eb974451d1..e4785f83d96 100644
--- a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
+++ b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
@@ -1,18 +1,127 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <sys/stat.h>
+
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
+#include "util/u_hash_table.h"
+#include "os/os_thread.h"
 
 #include "freedreno_drm_public.h"
 
 #include "freedreno/freedreno_screen.h"
 
+static struct util_hash_table *fd_tab = NULL;
+
+pipe_static_mutex(fd_screen_mutex);
+
+static void
+fd_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+	struct fd_screen *screen = fd_screen(pscreen);
+	boolean destroy;
+
+	pipe_mutex_lock(fd_screen_mutex);
+	destroy = --screen->refcnt == 0;
+	if (destroy) {
+		int fd = fd_device_fd(screen->dev);
+		util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+	}
+	pipe_mutex_unlock(fd_screen_mutex);
+
+	if (destroy) {
+		pscreen->destroy = screen->winsys_priv;
+		pscreen->destroy(pscreen);
+	}
+}
+
+static unsigned hash_fd(void *key)
+{
+	int fd = pointer_to_intptr(key);
+	struct stat stat;
+	fstat(fd, &stat);
+
+	return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+	int fd1 = pointer_to_intptr(key1);
+	int fd2 = pointer_to_intptr(key2);
+	struct stat stat1, stat2;
+	fstat(fd1, &stat1);
+	fstat(fd2, &stat2);
+
+	return stat1.st_dev != stat2.st_dev ||
+			stat1.st_ino != stat2.st_ino ||
+			stat1.st_rdev != stat2.st_rdev;
+}
+
 struct pipe_screen *
 fd_drm_screen_create(int fd)
 {
-	struct fd_device *dev = fd_device_new_dup(fd);
-	if (!dev)
-		return NULL;
-	return fd_screen_create(dev);
+	struct pipe_screen *pscreen = NULL;
+
+	pipe_mutex_lock(fd_screen_mutex);
+	if (!fd_tab) {
+		fd_tab = util_hash_table_create(hash_fd, compare_fd);
+		if (!fd_tab)
+			goto unlock;
+	}
+
+	pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+	if (pscreen) {
+		fd_screen(pscreen)->refcnt++;
+	} else {
+		struct fd_device *dev = fd_device_new_dup(fd);
+		if (!dev)
+			goto unlock;
+
+		pscreen = fd_screen_create(dev);
+		if (pscreen) {
+			int fd = fd_device_fd(dev);
+
+			util_hash_table_set(fd_tab, intptr_to_pointer(fd), pscreen);
+
+			/* Bit of a hack, to avoid circular linkage dependency,
+			 * ie. pipe driver having to call in to winsys, we
+			 * override the pipe drivers screen->destroy():
+			 */
+			fd_screen(pscreen)->winsys_priv = pscreen->destroy;
+			pscreen->destroy = fd_drm_screen_destroy;
+		}
+	}
+
+unlock:
+	pipe_mutex_unlock(fd_screen_mutex);
+	return pscreen;
 }
-- 
cgit v1.2.3


From f2533f2f8cc059e6d239e414f410483a53970cd3 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 11 Sep 2015 12:48:05 -0400
Subject: glsl: shader-enum to name debug fxns

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/Makefile.am           |   1 +
 src/glsl/shader_enums.c   | 204 ++++++++++++++++++++++++++++++++++++++++++++++
 src/glsl/shader_enums.h   |  53 ++++++++++++
 src/mesa/Makefile.sources |   4 +-
 4 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 src/glsl/shader_enums.c

(limited to 'src')

diff --git a/src/Makefile.am b/src/Makefile.am
index 0d49bcd19ed..9e15cca5ea4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -62,6 +62,7 @@ AM_CPPFLAGS = \
 noinst_LTLIBRARIES = libglsl_util.la
 
 libglsl_util_la_SOURCES = \
+	glsl/shader_enums.c \
 	mesa/main/imports.c \
 	mesa/program/prog_hash_table.c \
 	mesa/program/symbol_table.c \
diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c
new file mode 100644
index 00000000000..530fd9e4081
--- /dev/null
+++ b/src/glsl/shader_enums.c
@@ -0,0 +1,204 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "glsl/shader_enums.h"
+#include "util/macros.h"
+
+#define ENUM(x) [x] = #x
+#define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
+
+const char * gl_shader_stage_name(gl_shader_stage stage)
+{
+   static const char *names[] = {
+      ENUM(MESA_SHADER_VERTEX),
+      ENUM(MESA_SHADER_TESS_CTRL),
+      ENUM(MESA_SHADER_TESS_EVAL),
+      ENUM(MESA_SHADER_GEOMETRY),
+      ENUM(MESA_SHADER_FRAGMENT),
+      ENUM(MESA_SHADER_COMPUTE),
+   };
+   return NAME(stage);
+}
+
+const char * gl_vert_attrib_name(gl_vert_attrib attrib)
+{
+   static const char *names[] = {
+      ENUM(VERT_ATTRIB_POS),
+      ENUM(VERT_ATTRIB_WEIGHT),
+      ENUM(VERT_ATTRIB_NORMAL),
+      ENUM(VERT_ATTRIB_COLOR0),
+      ENUM(VERT_ATTRIB_COLOR1),
+      ENUM(VERT_ATTRIB_FOG),
+      ENUM(VERT_ATTRIB_COLOR_INDEX),
+      ENUM(VERT_ATTRIB_EDGEFLAG),
+      ENUM(VERT_ATTRIB_TEX0),
+      ENUM(VERT_ATTRIB_TEX1),
+      ENUM(VERT_ATTRIB_TEX2),
+      ENUM(VERT_ATTRIB_TEX3),
+      ENUM(VERT_ATTRIB_TEX4),
+      ENUM(VERT_ATTRIB_TEX5),
+      ENUM(VERT_ATTRIB_TEX6),
+      ENUM(VERT_ATTRIB_TEX7),
+      ENUM(VERT_ATTRIB_POINT_SIZE),
+      ENUM(VERT_ATTRIB_GENERIC0),
+      ENUM(VERT_ATTRIB_GENERIC1),
+      ENUM(VERT_ATTRIB_GENERIC2),
+      ENUM(VERT_ATTRIB_GENERIC3),
+      ENUM(VERT_ATTRIB_GENERIC4),
+      ENUM(VERT_ATTRIB_GENERIC5),
+      ENUM(VERT_ATTRIB_GENERIC6),
+      ENUM(VERT_ATTRIB_GENERIC7),
+      ENUM(VERT_ATTRIB_GENERIC8),
+      ENUM(VERT_ATTRIB_GENERIC9),
+      ENUM(VERT_ATTRIB_GENERIC10),
+      ENUM(VERT_ATTRIB_GENERIC11),
+      ENUM(VERT_ATTRIB_GENERIC12),
+      ENUM(VERT_ATTRIB_GENERIC13),
+      ENUM(VERT_ATTRIB_GENERIC14),
+      ENUM(VERT_ATTRIB_GENERIC15),
+   };
+   return NAME(attrib);
+}
+
+const char * gl_varying_slot_name(gl_varying_slot slot)
+{
+   static const char *names[] = {
+      ENUM(VARYING_SLOT_POS),
+      ENUM(VARYING_SLOT_COL0),
+      ENUM(VARYING_SLOT_COL1),
+      ENUM(VARYING_SLOT_FOGC),
+      ENUM(VARYING_SLOT_TEX0),
+      ENUM(VARYING_SLOT_TEX1),
+      ENUM(VARYING_SLOT_TEX2),
+      ENUM(VARYING_SLOT_TEX3),
+      ENUM(VARYING_SLOT_TEX4),
+      ENUM(VARYING_SLOT_TEX5),
+      ENUM(VARYING_SLOT_TEX6),
+      ENUM(VARYING_SLOT_TEX7),
+      ENUM(VARYING_SLOT_PSIZ),
+      ENUM(VARYING_SLOT_BFC0),
+      ENUM(VARYING_SLOT_BFC1),
+      ENUM(VARYING_SLOT_EDGE),
+      ENUM(VARYING_SLOT_CLIP_VERTEX),
+      ENUM(VARYING_SLOT_CLIP_DIST0),
+      ENUM(VARYING_SLOT_CLIP_DIST1),
+      ENUM(VARYING_SLOT_PRIMITIVE_ID),
+      ENUM(VARYING_SLOT_LAYER),
+      ENUM(VARYING_SLOT_VIEWPORT),
+      ENUM(VARYING_SLOT_FACE),
+      ENUM(VARYING_SLOT_PNTC),
+      ENUM(VARYING_SLOT_TESS_LEVEL_OUTER),
+      ENUM(VARYING_SLOT_TESS_LEVEL_INNER),
+      ENUM(VARYING_SLOT_VAR0),
+      ENUM(VARYING_SLOT_VAR1),
+      ENUM(VARYING_SLOT_VAR2),
+      ENUM(VARYING_SLOT_VAR3),
+      ENUM(VARYING_SLOT_VAR4),
+      ENUM(VARYING_SLOT_VAR5),
+      ENUM(VARYING_SLOT_VAR6),
+      ENUM(VARYING_SLOT_VAR7),
+      ENUM(VARYING_SLOT_VAR8),
+      ENUM(VARYING_SLOT_VAR9),
+      ENUM(VARYING_SLOT_VAR10),
+      ENUM(VARYING_SLOT_VAR11),
+      ENUM(VARYING_SLOT_VAR12),
+      ENUM(VARYING_SLOT_VAR13),
+      ENUM(VARYING_SLOT_VAR14),
+      ENUM(VARYING_SLOT_VAR15),
+      ENUM(VARYING_SLOT_VAR16),
+      ENUM(VARYING_SLOT_VAR17),
+      ENUM(VARYING_SLOT_VAR18),
+      ENUM(VARYING_SLOT_VAR19),
+      ENUM(VARYING_SLOT_VAR20),
+      ENUM(VARYING_SLOT_VAR21),
+      ENUM(VARYING_SLOT_VAR22),
+      ENUM(VARYING_SLOT_VAR23),
+      ENUM(VARYING_SLOT_VAR24),
+      ENUM(VARYING_SLOT_VAR25),
+      ENUM(VARYING_SLOT_VAR26),
+      ENUM(VARYING_SLOT_VAR27),
+      ENUM(VARYING_SLOT_VAR28),
+      ENUM(VARYING_SLOT_VAR29),
+      ENUM(VARYING_SLOT_VAR30),
+      ENUM(VARYING_SLOT_VAR31),
+   };
+   return NAME(slot);
+}
+
+const char * gl_system_value_name(gl_system_value sysval)
+{
+   static const char *names[] = {
+     ENUM(SYSTEM_VALUE_VERTEX_ID),
+     ENUM(SYSTEM_VALUE_INSTANCE_ID),
+     ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
+     ENUM(SYSTEM_VALUE_BASE_VERTEX),
+     ENUM(SYSTEM_VALUE_INVOCATION_ID),
+     ENUM(SYSTEM_VALUE_FRONT_FACE),
+     ENUM(SYSTEM_VALUE_SAMPLE_ID),
+     ENUM(SYSTEM_VALUE_SAMPLE_POS),
+     ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN),
+     ENUM(SYSTEM_VALUE_TESS_COORD),
+     ENUM(SYSTEM_VALUE_VERTICES_IN),
+     ENUM(SYSTEM_VALUE_PRIMITIVE_ID),
+     ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
+     ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
+     ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
+     ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+   };
+   return NAME(sysval);
+}
+
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
+{
+   static const char *names[] = {
+      ENUM(INTERP_QUALIFIER_NONE),
+      ENUM(INTERP_QUALIFIER_SMOOTH),
+      ENUM(INTERP_QUALIFIER_FLAT),
+      ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
+   };
+   return NAME(qual);
+}
+
+const char * gl_frag_result_name(gl_frag_result result)
+{
+   static const char *names[] = {
+      ENUM(FRAG_RESULT_DEPTH),
+      ENUM(FRAG_RESULT_STENCIL),
+      ENUM(FRAG_RESULT_COLOR),
+      ENUM(FRAG_RESULT_SAMPLE_MASK),
+      ENUM(FRAG_RESULT_DATA0),
+      ENUM(FRAG_RESULT_DATA1),
+      ENUM(FRAG_RESULT_DATA2),
+      ENUM(FRAG_RESULT_DATA3),
+      ENUM(FRAG_RESULT_DATA4),
+      ENUM(FRAG_RESULT_DATA5),
+      ENUM(FRAG_RESULT_DATA6),
+      ENUM(FRAG_RESULT_DATA7),
+   };
+   return NAME(result);
+}
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 7c598b64595..385ca972809 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -43,6 +43,8 @@ typedef enum
    MESA_SHADER_COMPUTE = 5,
 } gl_shader_stage;
 
+const char * gl_shader_stage_name(gl_shader_stage stage);
+
 #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
 
 
@@ -91,6 +93,8 @@ typedef enum
    VERT_ATTRIB_MAX = 33
 } gl_vert_attrib;
 
+const char * gl_vert_attrib_name(gl_vert_attrib attrib);
+
 /**
  * Symbolic constats to help iterating over
  * specific blocks of vertex attributes.
@@ -193,8 +197,43 @@ typedef enum
    VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
    VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
    VARYING_SLOT_VAR0, /* First generic varying slot */
+   /* the remaining are simply for the benefit of gl_varying_slot_name()
+    * and not to be construed as an upper bound:
+    */
+   VARYING_SLOT_VAR1,
+   VARYING_SLOT_VAR2,
+   VARYING_SLOT_VAR3,
+   VARYING_SLOT_VAR4,
+   VARYING_SLOT_VAR5,
+   VARYING_SLOT_VAR6,
+   VARYING_SLOT_VAR7,
+   VARYING_SLOT_VAR8,
+   VARYING_SLOT_VAR9,
+   VARYING_SLOT_VAR10,
+   VARYING_SLOT_VAR11,
+   VARYING_SLOT_VAR12,
+   VARYING_SLOT_VAR13,
+   VARYING_SLOT_VAR14,
+   VARYING_SLOT_VAR15,
+   VARYING_SLOT_VAR16,
+   VARYING_SLOT_VAR17,
+   VARYING_SLOT_VAR18,
+   VARYING_SLOT_VAR19,
+   VARYING_SLOT_VAR20,
+   VARYING_SLOT_VAR21,
+   VARYING_SLOT_VAR22,
+   VARYING_SLOT_VAR23,
+   VARYING_SLOT_VAR24,
+   VARYING_SLOT_VAR25,
+   VARYING_SLOT_VAR26,
+   VARYING_SLOT_VAR27,
+   VARYING_SLOT_VAR28,
+   VARYING_SLOT_VAR29,
+   VARYING_SLOT_VAR30,
+   VARYING_SLOT_VAR31,
 } gl_varying_slot;
 
+const char * gl_varying_slot_name(gl_varying_slot slot);
 
 /**
  * Bitflags for varying slots.
@@ -376,6 +415,7 @@ typedef enum
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
+const char * gl_system_value_name(gl_system_value sysval);
 
 /**
  * The possible interpolation qualifiers that can be applied to a fragment
@@ -393,6 +433,8 @@ enum glsl_interp_qualifier
    INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
 };
 
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
+
 /**
  * Fragment program results
  */
@@ -409,8 +451,19 @@ typedef enum
    /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
     * or ARB_fragment_program fragment.color[n]) color results.  If
     * any are written, FRAG_RESULT_COLOR will not be written.
+    * FRAG_RESULT_DATA1 and up are simply for the benefit of
+    * gl_frag_result_name() and not to be construed as an upper bound
     */
    FRAG_RESULT_DATA0 = 4,
+   FRAG_RESULT_DATA1,
+   FRAG_RESULT_DATA2,
+   FRAG_RESULT_DATA3,
+   FRAG_RESULT_DATA4,
+   FRAG_RESULT_DATA5,
+   FRAG_RESULT_DATA6,
+   FRAG_RESULT_DATA7,
 } gl_frag_result;
 
+const char * gl_frag_result_name(gl_frag_result result);
+
 #endif /* SHADER_ENUMS_H */
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index ed9848c5454..2a719a0bd89 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -523,7 +523,9 @@ PROGRAM_FILES = \
 	program/sampler.h \
 	program/string_to_uint_map.cpp \
 	program/symbol_table.c \
-	program/symbol_table.h
+	program/symbol_table.h \
+	../glsl/shader_enums.c \
+	../glsl/shader_enums.h
 
 PROGRAM_NIR_FILES = \
 	program/prog_to_nir.c \
-- 
cgit v1.2.3


From 840df72f9336994b2ca9dde981ec75fe10811a4e Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 15 Sep 2015 18:50:41 -0400
Subject: nir/print: bit of state refactoring

Rename print_var_state to print_state, and stuff FILE ptr into the state
object.  This avoids passing around an extra parameter everywhere.

v2: even more extensive conversion.. use state *everywhere* instead of
FILE ptr, and convert nir_print_instr() to use state as well

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_print.c | 261 +++++++++++++++++++++++++++--------------------
 1 file changed, 152 insertions(+), 109 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 69cadbae2b6..bdecc3ccdf0 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -37,6 +37,7 @@ print_tabs(unsigned num_tabs, FILE *fp)
 }
 
 typedef struct {
+   FILE *fp;
    /** map from nir_variable -> printable name */
    struct hash_table *ht;
 
@@ -45,11 +46,12 @@ typedef struct {
 
    /* an index used to make new non-conflicting names */
    unsigned index;
-} print_var_state;
+} print_state;
 
 static void
-print_register(nir_register *reg, FILE *fp)
+print_register(nir_register *reg, print_state *state)
 {
+   FILE *fp = state->fp;
    if (reg->name != NULL)
       fprintf(fp, "/* %s */ ", reg->name);
    if (reg->is_global)
@@ -61,90 +63,97 @@ print_register(nir_register *reg, FILE *fp)
 static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
 
 static void
-print_register_decl(nir_register *reg, FILE *fp)
+print_register_decl(nir_register *reg, print_state *state)
 {
+   FILE *fp = state->fp;
    fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
    if (reg->is_packed)
       fprintf(fp, "(packed) ");
-   print_register(reg, fp);
+   print_register(reg, state);
    if (reg->num_array_elems != 0)
       fprintf(fp, "[%u]", reg->num_array_elems);
    fprintf(fp, "\n");
 }
 
 static void
-print_ssa_def(nir_ssa_def *def, FILE *fp)
+print_ssa_def(nir_ssa_def *def, print_state *state)
 {
+   FILE *fp = state->fp;
    if (def->name != NULL)
       fprintf(fp, "/* %s */ ", def->name);
    fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
 }
 
 static void
-print_ssa_use(nir_ssa_def *def, FILE *fp)
+print_ssa_use(nir_ssa_def *def, print_state *state)
 {
+   FILE *fp = state->fp;
    if (def->name != NULL)
       fprintf(fp, "/* %s */ ", def->name);
    fprintf(fp, "ssa_%u", def->index);
 }
 
-static void print_src(nir_src *src, FILE *fp);
+static void print_src(nir_src *src, print_state *state);
 
 static void
-print_reg_src(nir_reg_src *src, FILE *fp)
+print_reg_src(nir_reg_src *src, print_state *state)
 {
-   print_register(src->reg, fp);
+   FILE *fp = state->fp;
+   print_register(src->reg, state);
    if (src->reg->num_array_elems != 0) {
       fprintf(fp, "[%u", src->base_offset);
       if (src->indirect != NULL) {
          fprintf(fp, " + ");
-         print_src(src->indirect, fp);
+         print_src(src->indirect, state);
       }
       fprintf(fp, "]");
    }
 }
 
 static void
-print_reg_dest(nir_reg_dest *dest, FILE *fp)
+print_reg_dest(nir_reg_dest *dest, print_state *state)
 {
-   print_register(dest->reg, fp);
+   FILE *fp = state->fp;
+   print_register(dest->reg, state);
    if (dest->reg->num_array_elems != 0) {
       fprintf(fp, "[%u", dest->base_offset);
       if (dest->indirect != NULL) {
          fprintf(fp, " + ");
-         print_src(dest->indirect, fp);
+         print_src(dest->indirect, state);
       }
       fprintf(fp, "]");
    }
 }
 
 static void
-print_src(nir_src *src, FILE *fp)
+print_src(nir_src *src, print_state *state)
 {
    if (src->is_ssa)
-      print_ssa_use(src->ssa, fp);
+      print_ssa_use(src->ssa, state);
    else
-      print_reg_src(&src->reg, fp);
+      print_reg_src(&src->reg, state);
 }
 
 static void
-print_dest(nir_dest *dest, FILE *fp)
+print_dest(nir_dest *dest, print_state *state)
 {
    if (dest->is_ssa)
-      print_ssa_def(&dest->ssa, fp);
+      print_ssa_def(&dest->ssa, state);
    else
-      print_reg_dest(&dest->reg, fp);
+      print_reg_dest(&dest->reg, state);
 }
 
 static void
-print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
 {
+   FILE *fp = state->fp;
+
    if (instr->src[src].negate)
       fprintf(fp, "-");
    if (instr->src[src].abs)
       fprintf(fp, "abs(");
 
-   print_src(&instr->src[src].src, fp);
+   print_src(&instr->src[src].src, state);
 
    bool print_swizzle = false;
    for (unsigned i = 0; i < 4; i++) {
@@ -172,11 +181,12 @@ print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
 }
 
 static void
-print_alu_dest(nir_alu_dest *dest, FILE *fp)
+print_alu_dest(nir_alu_dest *dest, print_state *state)
 {
+   FILE *fp = state->fp;
    /* we're going to print the saturate modifier later, after the opcode */
 
-   print_dest(&dest->dest, fp);
+   print_dest(&dest->dest, state);
 
    if (!dest->dest.is_ssa &&
        dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
@@ -188,9 +198,11 @@ print_alu_dest(nir_alu_dest *dest, FILE *fp)
 }
 
 static void
-print_alu_instr(nir_alu_instr *instr, FILE *fp)
+print_alu_instr(nir_alu_instr *instr, print_state *state)
 {
-   print_alu_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+
+   print_alu_dest(&instr->dest, state);
 
    fprintf(fp, " = %s", nir_op_infos[instr->op].name);
    if (instr->dest.saturate)
@@ -201,13 +213,15 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_alu_src(instr, i, fp);
+      print_alu_src(instr, i, state);
    }
 }
 
 static void
-print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
+print_var_decl(nir_variable *var, print_state *state)
 {
+   FILE *fp = state->fp;
+
    fprintf(fp, "decl_var ");
 
    const char *const cent = (var->data.centroid) ? "centroid " : "";
@@ -223,7 +237,7 @@ print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
    glsl_print_type(var->type, fp);
 
    struct set_entry *entry = NULL;
-   if (state)
+   if (state->syms)
       entry = _mesa_set_search(state->syms, var->name);
 
    char *name;
@@ -253,10 +267,11 @@ print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
 }
 
 static void
-print_var(nir_variable *var, print_var_state *state, FILE *fp)
+print_var(nir_variable *var, print_state *state)
 {
+   FILE *fp = state->fp;
    const char *name;
-   if (state) {
+   if (state->ht) {
       struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
 
       assert(entry != NULL);
@@ -269,14 +284,15 @@ print_var(nir_variable *var, print_var_state *state, FILE *fp)
 }
 
 static void
-print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref_var(nir_deref_var *deref, print_state *state)
 {
-   print_var(deref->var, state, fp);
+   print_var(deref->var, state);
 }
 
 static void
-print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
+print_deref_array(nir_deref_array *deref, print_state *state)
 {
+   FILE *fp = state->fp;
    fprintf(fp, "[");
    switch (deref->deref_array_type) {
    case nir_deref_array_type_direct:
@@ -285,7 +301,7 @@ print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
    case nir_deref_array_type_indirect:
       if (deref->base_offset != 0)
          fprintf(fp, "%u + ", deref->base_offset);
-      print_src(&deref->indirect, fp);
+      print_src(&deref->indirect, state);
       break;
    case nir_deref_array_type_wildcard:
       fprintf(fp, "*");
@@ -296,13 +312,14 @@ print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
 
 static void
 print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
-                   print_var_state *state, FILE *fp)
+                   print_state *state)
 {
+   FILE *fp = state->fp;
    fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
 }
 
 static void
-print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref(nir_deref_var *deref, print_state *state)
 {
    nir_deref *tail = &deref->deref;
    nir_deref *pretail = NULL;
@@ -311,18 +328,18 @@ print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
       case nir_deref_type_var:
          assert(pretail == NULL);
          assert(tail == &deref->deref);
-         print_deref_var(deref, state, fp);
+         print_deref_var(deref, state);
          break;
 
       case nir_deref_type_array:
          assert(pretail != NULL);
-         print_deref_array(nir_deref_as_array(tail), state, fp);
+         print_deref_array(nir_deref_as_array(tail), state);
          break;
 
       case nir_deref_type_struct:
          assert(pretail != NULL);
          print_deref_struct(nir_deref_as_struct(tail),
-                            pretail->type, state, fp);
+                            pretail->type, state);
          break;
 
       default:
@@ -335,13 +352,13 @@ print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
 }
 
 static void
-print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
-                      FILE *fp)
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 {
    unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   FILE *fp = state->fp;
 
    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
-      print_dest(&instr->dest, fp);
+      print_dest(&instr->dest, state);
       fprintf(fp, " = ");
    }
 
@@ -351,7 +368,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_src(&instr->src[i], fp);
+      print_src(&instr->src[i], state);
    }
 
    fprintf(fp, ") (");
@@ -362,7 +379,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_deref(instr->variables[i], state, fp);
+      print_deref(instr->variables[i], state);
    }
 
    fprintf(fp, ") (");
@@ -380,9 +397,11 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
 }
 
 static void
-print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
+print_tex_instr(nir_tex_instr *instr, print_state *state)
 {
-   print_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+
+   print_dest(&instr->dest, state);
 
    fprintf(fp, " = ");
 
@@ -427,7 +446,7 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
    }
 
    for (unsigned i = 0; i < instr->num_srcs; i++) {
-      print_src(&instr->src[i].src, fp);
+      print_src(&instr->src[i].src, state);
 
       fprintf(fp, " ");
 
@@ -490,7 +509,7 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
    }
 
    if (instr->sampler) {
-      print_deref(instr->sampler, state, fp);
+      print_deref(instr->sampler, state);
    } else {
       fprintf(fp, "%u", instr->sampler_index);
    }
@@ -499,29 +518,33 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
 }
 
 static void
-print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp)
+print_call_instr(nir_call_instr *instr, print_state *state)
 {
+   FILE *fp = state->fp;
+
    fprintf(fp, "call %s ", instr->callee->function->name);
 
    for (unsigned i = 0; i < instr->num_params; i++) {
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_deref(instr->params[i], state, fp);
+      print_deref(instr->params[i], state);
    }
 
    if (instr->return_deref != NULL) {
       if (instr->num_params != 0)
          fprintf(fp, ", ");
       fprintf(fp, "returning ");
-      print_deref(instr->return_deref, state, fp);
+      print_deref(instr->return_deref, state);
    }
 }
 
 static void
-print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
+print_load_const_instr(nir_load_const_instr *instr, print_state *state)
 {
-   print_ssa_def(&instr->def, fp);
+   FILE *fp = state->fp;
+
+   print_ssa_def(&instr->def, state);
 
    fprintf(fp, " = load_const (");
 
@@ -542,8 +565,10 @@ print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
 }
 
 static void
-print_jump_instr(nir_jump_instr *instr, FILE *fp)
+print_jump_instr(nir_jump_instr *instr, print_state *state)
 {
+   FILE *fp = state->fp;
+
    switch (instr->type) {
    case nir_jump_break:
       fprintf(fp, "break");
@@ -560,79 +585,83 @@ print_jump_instr(nir_jump_instr *instr, FILE *fp)
 }
 
 static void
-print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp)
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
 {
-   print_ssa_def(&instr->def, fp);
+   FILE *fp = state->fp;
+   print_ssa_def(&instr->def, state);
    fprintf(fp, " = undefined");
 }
 
 static void
-print_phi_instr(nir_phi_instr *instr, FILE *fp)
+print_phi_instr(nir_phi_instr *instr, print_state *state)
 {
-   print_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+   print_dest(&instr->dest, state);
    fprintf(fp, " = phi ");
    nir_foreach_phi_src(instr, src) {
       if (&src->node != exec_list_get_head(&instr->srcs))
          fprintf(fp, ", ");
 
       fprintf(fp, "block_%u: ", src->pred->index);
-      print_src(&src->src, fp);
+      print_src(&src->src, state);
    }
 }
 
 static void
-print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp)
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
 {
+   FILE *fp = state->fp;
    nir_foreach_parallel_copy_entry(instr, entry) {
       if (&entry->node != exec_list_get_head(&instr->entries))
          fprintf(fp, "; ");
 
-      print_dest(&entry->dest, fp);
+      print_dest(&entry->dest, state);
       fprintf(fp, " = ");
-      print_src(&entry->src, fp);
+      print_src(&entry->src, state);
    }
 }
 
 static void
-print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
 {
+   FILE *fp = state->fp;
    print_tabs(tabs, fp);
 
    switch (instr->type) {
    case nir_instr_type_alu:
-      print_alu_instr(nir_instr_as_alu(instr), fp);
+      print_alu_instr(nir_instr_as_alu(instr), state);
       break;
 
    case nir_instr_type_call:
-      print_call_instr(nir_instr_as_call(instr), state, fp);
+      print_call_instr(nir_instr_as_call(instr), state);
       break;
 
    case nir_instr_type_intrinsic:
-      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp);
+      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
       break;
 
    case nir_instr_type_tex:
-      print_tex_instr(nir_instr_as_tex(instr), state, fp);
+      print_tex_instr(nir_instr_as_tex(instr), state);
       break;
 
    case nir_instr_type_load_const:
-      print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp);
+      print_load_const_instr(nir_instr_as_load_const(instr), state);
       break;
 
    case nir_instr_type_jump:
-      print_jump_instr(nir_instr_as_jump(instr), fp);
+      print_jump_instr(nir_instr_as_jump(instr), state);
       break;
 
    case nir_instr_type_ssa_undef:
-      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp);
+      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
       break;
 
    case nir_instr_type_phi:
-      print_phi_instr(nir_instr_as_phi(instr), fp);
+      print_phi_instr(nir_instr_as_phi(instr), state);
       break;
 
    case nir_instr_type_parallel_copy:
-      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp);
+      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
       break;
 
    default:
@@ -650,12 +679,14 @@ compare_block_index(const void *p1, const void *p2)
    return (int) block1->index - (int) block2->index;
 }
 
-static void print_cf_node(nir_cf_node *node, print_var_state *state,
-                          unsigned tabs, FILE *fp);
+static void print_cf_node(nir_cf_node *node, print_state *state,
+                          unsigned tabs);
 
 static void
-print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
+print_block(nir_block *block, print_state *state, unsigned tabs)
 {
+   FILE *fp = state->fp;
+
    print_tabs(tabs, fp);
    fprintf(fp, "block block_%u:\n", block->index);
 
@@ -683,7 +714,7 @@ print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
    free(preds);
 
    nir_foreach_instr(block, instr) {
-      print_instr(instr, state, tabs, fp);
+      print_instr(instr, state, tabs);
       fprintf(fp, "\n");
    }
 
@@ -697,51 +728,54 @@ print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
 }
 
 static void
-print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp)
+print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
 {
+   FILE *fp = state->fp;
+
    print_tabs(tabs, fp);
    fprintf(fp, "if ");
-   print_src(&if_stmt->condition, fp);
+   print_src(&if_stmt->condition, state);
    fprintf(fp, " {\n");
    foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
    }
    print_tabs(tabs, fp);
    fprintf(fp, "} else {\n");
    foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
    }
    print_tabs(tabs, fp);
    fprintf(fp, "}\n");
 }
 
 static void
-print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp)
+print_loop(nir_loop *loop, print_state *state, unsigned tabs)
 {
+   FILE *fp = state->fp;
+
    print_tabs(tabs, fp);
    fprintf(fp, "loop {\n");
    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
    }
    print_tabs(tabs, fp);
    fprintf(fp, "}\n");
 }
 
 static void
-print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
-              FILE *fp)
+print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
 {
    switch (node->type) {
    case nir_cf_node_block:
-      print_block(nir_cf_node_as_block(node), state, tabs, fp);
+      print_block(nir_cf_node_as_block(node), state, tabs);
       break;
 
    case nir_cf_node_if:
-      print_if(nir_cf_node_as_if(node), state, tabs, fp);
+      print_if(nir_cf_node_as_if(node), state, tabs);
       break;
 
    case nir_cf_node_loop:
-      print_loop(nir_cf_node_as_loop(node), state, tabs, fp);
+      print_loop(nir_cf_node_as_loop(node), state, tabs);
       break;
 
    default:
@@ -750,40 +784,42 @@ print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
 }
 
 static void
-print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
+print_function_impl(nir_function_impl *impl, print_state *state)
 {
+   FILE *fp = state->fp;
+
    fprintf(fp, "\nimpl %s ", impl->overload->function->name);
 
    for (unsigned i = 0; i < impl->num_params; i++) {
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_var(impl->params[i], state, fp);
+      print_var(impl->params[i], state);
    }
 
    if (impl->return_var != NULL) {
       if (impl->num_params != 0)
          fprintf(fp, ", ");
       fprintf(fp, "returning ");
-      print_var(impl->return_var, state, fp);
+      print_var(impl->return_var, state);
    }
 
    fprintf(fp, "{\n");
 
    foreach_list_typed(nir_variable, var, node, &impl->locals) {
       fprintf(fp, "\t");
-      print_var_decl(var, state, fp);
+      print_var_decl(var, state);
    }
 
    foreach_list_typed(nir_register, reg, node, &impl->registers) {
       fprintf(fp, "\t");
-      print_register_decl(reg, fp);
+      print_register_decl(reg, state);
    }
 
    nir_index_blocks(impl);
 
    foreach_list_typed(nir_cf_node, node, node, &impl->body) {
-      print_cf_node(node, state, 1, fp);
+      print_cf_node(node, state, 1);
    }
 
    fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
@@ -791,8 +827,10 @@ print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
 
 static void
 print_function_overload(nir_function_overload *overload,
-                        print_var_state *state, FILE *fp)
+                        print_state *state)
 {
+   FILE *fp = state->fp;
+
    fprintf(fp, "decl_overload %s ", overload->function->name);
 
    for (unsigned i = 0; i < overload->num_params; i++) {
@@ -826,22 +864,23 @@ print_function_overload(nir_function_overload *overload,
    fprintf(fp, "\n");
 
    if (overload->impl != NULL) {
-      print_function_impl(overload->impl, state, fp);
+      print_function_impl(overload->impl, state);
       return;
    }
 }
 
 static void
-print_function(nir_function *func, print_var_state *state, FILE *fp)
+print_function(nir_function *func, print_state *state)
 {
    foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
-      print_function_overload(overload, state, fp);
+      print_function_overload(overload, state);
    }
 }
 
 static void
-init_print_state(print_var_state *state)
+init_print_state(print_state *state, nir_shader *shader, FILE *fp)
 {
+   state->fp = fp;
    state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                        _mesa_key_pointer_equal);
    state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
@@ -850,7 +889,7 @@ init_print_state(print_var_state *state)
 }
 
 static void
-destroy_print_state(print_var_state *state)
+destroy_print_state(print_state *state)
 {
    _mesa_hash_table_destroy(state->ht, NULL);
    _mesa_set_destroy(state->syms, NULL);
@@ -859,35 +898,35 @@ destroy_print_state(print_var_state *state)
 void
 nir_print_shader(nir_shader *shader, FILE *fp)
 {
-   print_var_state state;
-   init_print_state(&state);
+   print_state state;
+   init_print_state(&state, shader, fp);
 
    foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
    }
 
    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
    }
 
    foreach_list_typed(nir_variable, var, node, &shader->outputs) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
    }
 
    foreach_list_typed(nir_variable, var, node, &shader->globals) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
    }
 
    foreach_list_typed(nir_variable, var, node, &shader->system_values) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
    }
 
    foreach_list_typed(nir_register, reg, node, &shader->registers) {
-      print_register_decl(reg, fp);
+      print_register_decl(reg, &state);
    }
 
    foreach_list_typed(nir_function, func, node, &shader->functions) {
-      print_function(func, &state, fp);
+      print_function(func, &state);
    }
 
    destroy_print_state(&state);
@@ -896,5 +935,9 @@ nir_print_shader(nir_shader *shader, FILE *fp)
 void
 nir_print_instr(const nir_instr *instr, FILE *fp)
 {
-   print_instr(instr, NULL, 0, fp);
+   print_state state = {
+      .fp = fp,
+   };
+   print_instr(instr, &state, 0);
+
 }
-- 
cgit v1.2.3


From aecbc93f2d1ff9de4e03a2b216e86dcb9a4ce414 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 15 Sep 2015 18:55:48 -0400
Subject: nir/print: print symbolic names from shader-enum

v2: split out moving of FILE *fp into state structure into it's own
(more complete patch) to reduce the noise in this one

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir_print.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index bdecc3ccdf0..ca8059fc920 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -26,6 +26,7 @@
  */
 
 #include "nir.h"
+#include "shader_enums.h"
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -38,6 +39,7 @@ print_tabs(unsigned num_tabs, FILE *fp)
 
 typedef struct {
    FILE *fp;
+   nir_shader *shader;
    /** map from nir_variable -> printable name */
    struct hash_table *ht;
 
@@ -229,10 +231,10 @@ print_var_decl(nir_variable *var, print_state *state)
    const char *const inv = (var->data.invariant) ? "invariant " : "";
    const char *const mode[] = { "shader_in ", "shader_out ", "", "",
                                 "uniform ", "shader_storage", "system " };
-   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
 
    fprintf(fp, "%s%s%s%s%s ",
-      cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]);
+      cent, samp, inv, mode[var->data.mode],
+	  glsl_interp_qualifier_name(var->data.interpolation));
 
    glsl_print_type(var->type, fp);
 
@@ -255,7 +257,41 @@ print_var_decl(nir_variable *var, print_state *state)
        var->data.mode == nir_var_shader_out ||
        var->data.mode == nir_var_uniform ||
        var->data.mode == nir_var_shader_storage) {
-      fprintf(fp, " (%u, %u)", var->data.location, var->data.driver_location);
+      const char *loc = NULL;
+      char buf[4];
+
+      switch (state->shader->stage) {
+      case MESA_SHADER_VERTEX:
+         if (var->data.mode == nir_var_shader_in)
+            loc = gl_vert_attrib_name(var->data.location);
+         else if (var->data.mode == nir_var_shader_out)
+            loc = gl_varying_slot_name(var->data.location);
+         break;
+      case MESA_SHADER_GEOMETRY:
+         if ((var->data.mode == nir_var_shader_in) ||
+             (var->data.mode == nir_var_shader_out))
+            loc = gl_varying_slot_name(var->data.location);
+         break;
+      case MESA_SHADER_FRAGMENT:
+         if (var->data.mode == nir_var_shader_in)
+            loc = gl_varying_slot_name(var->data.location);
+         else if (var->data.mode == nir_var_shader_out)
+            loc = gl_frag_result_name(var->data.location);
+         break;
+      case MESA_SHADER_TESS_CTRL:
+      case MESA_SHADER_TESS_EVAL:
+      case MESA_SHADER_COMPUTE:
+      default:
+         /* TODO */
+         break;
+      }
+
+      if (!loc) {
+         snprintf(buf, sizeof(buf), "%u", var->data.location);
+         loc = buf;
+      }
+
+      fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
    }
 
    fprintf(fp, "\n");
@@ -881,6 +917,7 @@ static void
 init_print_state(print_state *state, nir_shader *shader, FILE *fp)
 {
    state->fp = fp;
+   state->shader = shader;
    state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                        _mesa_key_pointer_equal);
    state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
@@ -901,6 +938,8 @@ nir_print_shader(nir_shader *shader, FILE *fp)
    print_state state;
    init_print_state(&state, shader, fp);
 
+   fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+
    foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
       print_var_decl(var, &state);
    }
-- 
cgit v1.2.3


From edc01c6704b39619ba3856ad801584611869a9d8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 15 Sep 2015 14:03:04 -0600
Subject: mesa: whitespace, line wrap fixes in varray.c

Trivial.
---
 src/mesa/main/varray.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 7a1dddc9ccf..c3c2424574c 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -657,7 +657,7 @@ _mesa_PointSizePointerOES(GLenum type, GLsizei stride, const GLvoid *ptr)
                   "glPointSizePointer(ES 1.x only)");
       return;
    }
-      
+
    update_array(ctx, "glPointSizePointer", VERT_ATTRIB_POINT_SIZE,
                 legalTypes, 1, 1,
                 1, type, stride, GL_FALSE, GL_FALSE, GL_FALSE, ptr);
@@ -933,7 +933,8 @@ get_current_attrib(struct gl_context *ctx, GLuint index, const char *function)
       return NULL;
    }
 
-   assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+   assert(VERT_ATTRIB_GENERIC(index) <
+          ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
 
    FLUSH_CURRENT(ctx, 0);
    return ctx->Current.Attrib[VERT_ATTRIB_GENERIC(index)];
@@ -985,7 +986,9 @@ _mesa_GetVertexAttribLdv(GLuint index, GLenum pname, GLdouble *params)
    GET_CURRENT_CONTEXT(ctx);
 
    if (pname == GL_CURRENT_VERTEX_ATTRIB_ARB) {
-      const GLdouble *v = (const GLdouble *)get_current_attrib(ctx, index, "glGetVertexAttribLdv");
+      const GLdouble *v =
+         (const GLdouble *)get_current_attrib(ctx, index,
+                                              "glGetVertexAttribLdv");
       if (v != NULL) {
          params[0] = v[0];
          params[1] = v[1];
@@ -1080,9 +1083,11 @@ _mesa_GetVertexAttribPointerv(GLuint index, GLenum pname, GLvoid **pointer)
       return;
    }
 
-   assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+   assert(VERT_ATTRIB_GENERIC(index) <
+          ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
 
-   *pointer = (GLvoid *) ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
+   *pointer = (GLvoid *)
+      ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
 }
 
 
@@ -1193,8 +1198,8 @@ _mesa_GetVertexArrayIndexed64iv(GLuint vaobj, GLuint index,
     * required to be the same, so in practice this doesn't matter.
     */
    if (index >= ctx->Const.MaxVertexAttribBindings) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv("
-                  "index %d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv(index"
+                  "%d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
                   index, ctx->Const.MaxVertexAttribBindings);
       return;
    }
@@ -1637,7 +1642,8 @@ _mesa_primitive_restart_index(const struct gl_context *ctx, GLenum ib_type)
  * GL_ARB_vertex_attrib_binding
  */
 static void
-vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object *vao,
+vertex_array_vertex_buffer(struct gl_context *ctx,
+                           struct gl_vertex_array_object *vao,
                            GLuint bindingIndex, GLuint buffer, GLintptr offset,
                            GLsizei stride, const char *func)
 {
@@ -1683,7 +1689,8 @@ vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object
       return;
    }
 
-   if (buffer == vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
+   if (buffer ==
+       vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
       vbo = vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj;
    } else if (buffer != 0) {
       vbo = _mesa_lookup_bufferobj(ctx, buffer);
@@ -2223,7 +2230,8 @@ _mesa_VertexBindingDivisor(GLuint bindingIndex, GLuint divisor)
 
 
 void GLAPIENTRY
-_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex, GLuint divisor)
+_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex,
+                                GLuint divisor)
 {
    struct gl_vertex_array_object *vao;
    GET_CURRENT_CONTEXT(ctx);
@@ -2343,7 +2351,7 @@ _mesa_print_arrays(struct gl_context *ctx)
 /**
  * Initialize vertex array state for given context.
  */
-void 
+void
 _mesa_init_varray(struct gl_context *ctx)
 {
    ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0);
@@ -2369,7 +2377,7 @@ delete_arrayobj_cb(GLuint id, void *data, void *userData)
 /**
  * Free vertex array state for given context.
  */
-void 
+void
 _mesa_free_varray_data(struct gl_context *ctx)
 {
    _mesa_HashDeleteAll(ctx->Array.Objects, delete_arrayobj_cb, ctx);
-- 
cgit v1.2.3


From 8faed71830302bbc640af9ef120bba276043a0a9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 15 Sep 2015 14:04:58 -0600
Subject: mesa: remove trailing whitespace in bufferobj.c

Trivial.
---
 src/mesa/main/bufferobj.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 7c6c70ac1f8..96590f5d5b1 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -391,7 +391,7 @@ convert_clear_buffer_data(struct gl_context *ctx,
 
 /**
  * Allocate and initialize a new buffer object.
- * 
+ *
  * Default callback for the \c dd_function_table::NewBufferObject() hook.
  */
 static struct gl_buffer_object *
@@ -409,7 +409,7 @@ _mesa_new_buffer_object(struct gl_context *ctx, GLuint name)
 
 /**
  * Delete a buffer object.
- * 
+ *
  * Default callback for the \c dd_function_table::DeleteBuffer() hook.
  */
 static void
@@ -980,7 +980,7 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
                                         &newBufObj, "glBindBuffer"))
          return;
    }
-   
+
    /* bind new buffer */
    _mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
 }
@@ -988,7 +988,7 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
 
 /**
  * Update the default buffer objects in the given context to reference those
- * specified in the shared state and release those referencing the old 
+ * specified in the shared state and release those referencing the old
  * shared state.
  */
 void
@@ -1192,7 +1192,7 @@ _mesa_BindBuffer(GLenum target, GLuint buffer)
 
 /**
  * Delete a set of buffer objects.
- * 
+ *
  * \param n      Number of buffer objects to delete.
  * \param ids    Array of \c n buffer object IDs.
  */
@@ -1410,9 +1410,9 @@ _mesa_CreateBuffers(GLsizei n, GLuint *buffers)
 
 /**
  * Determine if ID is the name of a buffer object.
- * 
+ *
  * \param id  ID of the potential buffer object.
- * \return  \c GL_TRUE if \c id is the name of a buffer object, 
+ * \return  \c GL_TRUE if \c id is the name of a buffer object,
  *          \c GL_FALSE otherwise.
  */
 GLboolean GLAPIENTRY
-- 
cgit v1.2.3


From 1aff899a874abddc2d79e595242a233e874e4a96 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 15 Sep 2015 14:28:38 -0600
Subject: mesa: move GL_APPLE_object_purgeable functions to new file

Move this code out of bufferobj.c since it's not strongly connected to
buffer objects.

Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/mapi/glapi/gen/gl_genexec.py |   1 +
 src/mesa/Makefile.sources        |   2 +
 src/mesa/main/bufferobj.c        | 378 -----------------------------------
 src/mesa/main/bufferobj.h        |  10 -
 src/mesa/main/objectpurge.c      | 416 +++++++++++++++++++++++++++++++++++++++
 src/mesa/main/objectpurge.h      |  42 ++++
 6 files changed, 461 insertions(+), 388 deletions(-)
 create mode 100644 src/mesa/main/objectpurge.c
 create mode 100644 src/mesa/main/objectpurge.h

(limited to 'src')

diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py
index 26d8e7bfb3a..6c66779c222 100644
--- a/src/mapi/glapi/gen/gl_genexec.py
+++ b/src/mapi/glapi/gen/gl_genexec.py
@@ -88,6 +88,7 @@ header = """/**
 #include "main/matrix.h"
 #include "main/multisample.h"
 #include "main/objectlabel.h"
+#include "main/objectpurge.h"
 #include "main/performance_monitor.h"
 #include "main/pipelineobj.h"
 #include "main/pixel.h"
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 2a719a0bd89..0915594cea6 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -134,6 +134,8 @@ MAIN_FILES = \
 	main/multisample.h \
 	main/objectlabel.c \
 	main/objectlabel.h \
+	main/objectpurge.c \
+	main/objectpurge.h \
 	main/pack.c \
 	main/pack.h \
 	main/pbo.c \
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 96590f5d5b1..3b87654c5d2 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -36,17 +36,13 @@
 #include "enums.h"
 #include "hash.h"
 #include "imports.h"
-#include "image.h"
 #include "context.h"
 #include "bufferobj.h"
-#include "fbobject.h"
 #include "mtypes.h"
-#include "texobj.h"
 #include "teximage.h"
 #include "glformats.h"
 #include "texstore.h"
 #include "transformfeedback.h"
-#include "dispatch.h"
 
 
 /* Debug flags */
@@ -2634,380 +2630,6 @@ _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset,
 }
 
 
-static GLenum
-buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_buffer_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-   if (!_mesa_is_bufferobj(bufObj)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.BufferObjectPurgeable)
-      retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_renderbuffer *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_renderbuffer(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.RenderObjectPurgeable)
-      retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_texture_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_texture(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.TextureObjectPurgeable)
-      retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
-   GLenum retval;
-
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   switch (option) {
-   case GL_VOLATILE_APPLE:
-   case GL_RELEASED_APPLE:
-      /* legal */
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectPurgeable(name = 0x%x) invalid option: %d",
-                  name, option);
-      return 0;
-   }
-
-   switch (objectType) {
-   case GL_TEXTURE:
-      retval = texture_object_purgeable(ctx, name, option);
-      break;
-   case GL_RENDERBUFFER_EXT:
-      retval = renderbuffer_purgeable(ctx, name, option);
-      break;
-   case GL_BUFFER_OBJECT_APPLE:
-      retval = buffer_object_purgeable(ctx, name, option);
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectPurgeable(name = 0x%x) invalid type: %d",
-                  name, objectType);
-      return 0;
-   }
-
-   /* In strict conformance to the spec, we must only return VOLATILE when
-    * when passed the VOLATILE option. Madness.
-    *
-    * XXX First fix the spec, then fix me.
-    */
-   return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
-}
-
-
-static GLenum
-buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_buffer_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is "
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.BufferObjectUnpurgeable)
-      retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_renderbuffer *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_renderbuffer(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is "
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.RenderObjectUnpurgeable)
-      retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_texture_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_texture(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is"
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.TextureObjectUnpurgeable)
-      retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   switch (option) {
-   case GL_RETAINED_APPLE:
-   case GL_UNDEFINED_APPLE:
-      /* legal */
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
-                  name, option);
-      return 0;
-   }
-
-   switch (objectType) {
-   case GL_BUFFER_OBJECT_APPLE:
-      return buffer_object_unpurgeable(ctx, name, option);
-   case GL_TEXTURE:
-      return texture_object_unpurgeable(ctx, name, option);
-   case GL_RENDERBUFFER_EXT:
-      return renderbuffer_unpurgeable(ctx, name, option);
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
-                  name, objectType);
-      return 0;
-   }
-}
-
-
-static void
-get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
-                              GLenum pname, GLint *params)
-{
-   struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetObjectParameteriv(name = 0x%x) invalid object", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = bufObj->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-static void
-get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
-                             GLenum pname, GLint *params)
-{
-   struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
-   if (!rb) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = rb->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-static void
-get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
-                               GLenum pname, GLint *params)
-{
-   struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
-   if (!texObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = texObj->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
-                                GLint *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetObjectParameteriv(name = 0x%x)", name);
-      return;
-   }
-
-   switch (objectType) {
-   case GL_TEXTURE:
-      get_texture_object_parameteriv(ctx, name, pname, params);
-      break;
-   case GL_BUFFER_OBJECT_APPLE:
-      get_buffer_object_parameteriv(ctx, name, pname, params);
-      break;
-   case GL_RENDERBUFFER_EXT:
-      get_renderbuffer_parameteriv(ctx, name, pname, params);
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
-                  name, objectType);
-   }
-}
-
 /**
  * Binds a buffer object to a uniform buffer binding point.
  *
diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h
index b7154c5d889..3eac96df23e 100644
--- a/src/mesa/main/bufferobj.h
+++ b/src/mesa/main/bufferobj.h
@@ -317,16 +317,6 @@ void GLAPIENTRY
 _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset,
                                   GLsizeiptr length);
 
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
-                                GLenum pname, GLint* params);
-
 void GLAPIENTRY
 _mesa_BindBufferRange(GLenum target, GLuint index,
                       GLuint buffer, GLintptr offset, GLsizeiptr size);
diff --git a/src/mesa/main/objectpurge.c b/src/mesa/main/objectpurge.c
new file mode 100644
index 00000000000..d730f46b8ec
--- /dev/null
+++ b/src/mesa/main/objectpurge.c
@@ -0,0 +1,416 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Code related to the GL_APPLE_object_purgeable extension.
+ */
+
+
+#include "glheader.h"
+#include "enums.h"
+#include "hash.h"
+#include "imports.h"
+#include "context.h"
+#include "bufferobj.h"
+#include "fbobject.h"
+#include "mtypes.h"
+#include "objectpurge.h"
+#include "texobj.h"
+#include "teximage.h"
+
+
+static GLenum
+buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_buffer_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+   if (!_mesa_is_bufferobj(bufObj)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.BufferObjectPurgeable)
+      retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_renderbuffer *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_renderbuffer(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.RenderObjectPurgeable)
+      retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_texture_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_texture(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.TextureObjectPurgeable)
+      retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+   GLenum retval;
+
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   switch (option) {
+   case GL_VOLATILE_APPLE:
+   case GL_RELEASED_APPLE:
+      /* legal */
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectPurgeable(name = 0x%x) invalid option: %d",
+                  name, option);
+      return 0;
+   }
+
+   switch (objectType) {
+   case GL_TEXTURE:
+      retval = texture_object_purgeable(ctx, name, option);
+      break;
+   case GL_RENDERBUFFER_EXT:
+      retval = renderbuffer_purgeable(ctx, name, option);
+      break;
+   case GL_BUFFER_OBJECT_APPLE:
+      retval = buffer_object_purgeable(ctx, name, option);
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectPurgeable(name = 0x%x) invalid type: %d",
+                  name, objectType);
+      return 0;
+   }
+
+   /* In strict conformance to the spec, we must only return VOLATILE when
+    * when passed the VOLATILE option. Madness.
+    *
+    * XXX First fix the spec, then fix me.
+    */
+   return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
+}
+
+
+static GLenum
+buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_buffer_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is "
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.BufferObjectUnpurgeable)
+      retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_renderbuffer *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_renderbuffer(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is "
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.RenderObjectUnpurgeable)
+      retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_texture_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_texture(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is"
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.TextureObjectUnpurgeable)
+      retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   switch (option) {
+   case GL_RETAINED_APPLE:
+   case GL_UNDEFINED_APPLE:
+      /* legal */
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
+                  name, option);
+      return 0;
+   }
+
+   switch (objectType) {
+   case GL_BUFFER_OBJECT_APPLE:
+      return buffer_object_unpurgeable(ctx, name, option);
+   case GL_TEXTURE:
+      return texture_object_unpurgeable(ctx, name, option);
+   case GL_RENDERBUFFER_EXT:
+      return renderbuffer_unpurgeable(ctx, name, option);
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
+                  name, objectType);
+      return 0;
+   }
+}
+
+
+static void
+get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
+                              GLenum pname, GLint *params)
+{
+   struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetObjectParameteriv(name = 0x%x) invalid object", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = bufObj->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+static void
+get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
+                             GLenum pname, GLint *params)
+{
+   struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+   if (!rb) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = rb->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+static void
+get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
+                               GLenum pname, GLint *params)
+{
+   struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+   if (!texObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = texObj->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
+                                GLint *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetObjectParameteriv(name = 0x%x)", name);
+      return;
+   }
+
+   switch (objectType) {
+   case GL_TEXTURE:
+      get_texture_object_parameteriv(ctx, name, pname, params);
+      break;
+   case GL_BUFFER_OBJECT_APPLE:
+      get_buffer_object_parameteriv(ctx, name, pname, params);
+      break;
+   case GL_RENDERBUFFER_EXT:
+      get_renderbuffer_parameteriv(ctx, name, pname, params);
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
+                  name, objectType);
+   }
+}
diff --git a/src/mesa/main/objectpurge.h b/src/mesa/main/objectpurge.h
new file mode 100644
index 00000000000..f0490121ef6
--- /dev/null
+++ b/src/mesa/main/objectpurge.h
@@ -0,0 +1,42 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef OBJECTPURGE_H
+#define OBJECTPURGE_H
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
+                                GLenum pname, GLint* params);
+
+
+#endif /* OBJECTPURGE_H */
-- 
cgit v1.2.3


From eb081681df248750727a8a76436760d617b4a6a9 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 15 Sep 2015 19:32:10 -0400
Subject: st/mesa: avoid integer overflows with buffers >= 512MB

This fixes failures with the newly-submitted max-size texture buffer
piglit test for GPUs exposing >= 128M max texels.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
---
 src/mesa/state_tracker/st_atom_texture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 31e0f6ba06c..3e3775200a8 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -274,8 +274,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
          return NULL;
       size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
 
-      f = ((base * 8) / desc->block.bits) * desc->block.width;
-      n = ((size * 8) / desc->block.bits) * desc->block.width;
+      f = (base / (desc->block.bits / 8)) * desc->block.width;
+      n = (size / (desc->block.bits / 8)) * desc->block.width;
       if (!n)
          return NULL;
       templ.u.buf.first_element = f;
-- 
cgit v1.2.3


From 7a275fcda8ffa3d69b7be6f356469f4af272a6ad Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 15 Sep 2015 19:39:25 -0400
Subject: nv50, nvc0: fix max texture buffer size to 128M elements

This is what the hardware supports, there never was any sort of 64K
limit.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 9068ae1afaf..c3bbc833f5b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MAX_TEXEL_OFFSET:
       return 7;
    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
       return 330;
    case PIPE_CAP_MAX_RENDER_TARGETS:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ead43f86262..1909b914d02 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
       return 31;
    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
       return 410;
    case PIPE_CAP_MAX_RENDER_TARGETS:
-- 
cgit v1.2.3


From 8fd3e53f3dc40e4013348e63a0cc7a2787410899 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 4 Aug 2015 14:28:02 -0700
Subject: gallium/ttn: Convert to using VARYING_SLOT_* / FRAG_RESULT_*.

This avoids exceeding the size of the .index bitfield since it got
truncated, and should make our NIR look more like the NIR that the rest of
the NIR developers are working on.

v2: split out vc4 updates, first patch uses varying_slot_to_tgsi_semantic()
    helper, and second patch does the actual conversion.
v3: add frag_result_to_tgsi_semantic() helper and don't try to map
    frag_results to semantic name/index as if they were varying_slot's
v4: use VERT_ATTRIB_ for VS inputs
v5: Fix vc4 build.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c            | 181 +++++++++++++++++++--
 src/gallium/auxiliary/nir/tgsi_to_nir.h            |   6 +
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       |  61 ++++---
 src/gallium/drivers/vc4/vc4_nir_lower_blend.c      |   6 +-
 src/gallium/drivers/vc4/vc4_nir_lower_io.c         |  13 +-
 src/gallium/drivers/vc4/vc4_program.c              |  19 ++-
 6 files changed, 239 insertions(+), 47 deletions(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index cccc5606ccf..cf43ef2506f 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -94,6 +94,128 @@ struct ttn_compile {
 #define ttn_channel(b, src, swiz) \
    nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
 
+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+   switch (semantic) {
+   case TGSI_SEMANTIC_POSITION:
+      return VARYING_SLOT_POS;
+   case TGSI_SEMANTIC_COLOR:
+      if (index == 0)
+         return VARYING_SLOT_COL0;
+      else
+         return VARYING_SLOT_COL1;
+   case TGSI_SEMANTIC_BCOLOR:
+      if (index == 0)
+         return VARYING_SLOT_BFC0;
+      else
+         return VARYING_SLOT_BFC1;
+   case TGSI_SEMANTIC_FOG:
+      return VARYING_SLOT_FOGC;
+   case TGSI_SEMANTIC_PSIZE:
+      return VARYING_SLOT_PSIZ;
+   case TGSI_SEMANTIC_GENERIC:
+      return VARYING_SLOT_VAR0 + index;
+   case TGSI_SEMANTIC_FACE:
+      return VARYING_SLOT_FACE;
+   case TGSI_SEMANTIC_EDGEFLAG:
+      return VARYING_SLOT_EDGE;
+   case TGSI_SEMANTIC_PRIMID:
+      return VARYING_SLOT_PRIMITIVE_ID;
+   case TGSI_SEMANTIC_CLIPDIST:
+      if (index == 0)
+         return VARYING_SLOT_CLIP_DIST0;
+      else
+         return VARYING_SLOT_CLIP_DIST1;
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      return VARYING_SLOT_CLIP_VERTEX;
+   case TGSI_SEMANTIC_TEXCOORD:
+      return VARYING_SLOT_TEX0 + index;
+   case TGSI_SEMANTIC_PCOORD:
+      return VARYING_SLOT_PNTC;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return VARYING_SLOT_VIEWPORT;
+   case TGSI_SEMANTIC_LAYER:
+      return VARYING_SLOT_LAYER;
+   default:
+      fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+      abort();
+   }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+      [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+      [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+      [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+      [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+      [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+      [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+      [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+      [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+      [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+      [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+      [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+      [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+      [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+   };
+
+   if (slot >= VARYING_SLOT_VAR0) {
+      *semantic_name = TGSI_SEMANTIC_GENERIC;
+      *semantic_index = slot - VARYING_SLOT_VAR0;
+      return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+      *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+      *semantic_index = slot - VARYING_SLOT_TEX0;
+      return;
+   }
+
+   if (slot >= ARRAY_SIZE(map)) {
+      fprintf(stderr, "Unknown varying slot %d\n", slot);
+      abort();
+   }
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+      [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+      [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+      [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+      [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+      [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+      [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+      [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+   };
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
 static nir_ssa_def *
 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
 {
@@ -216,12 +338,15 @@ ttn_emit_declaration(struct ttn_compile *c)
             var->data.mode = nir_var_shader_in;
             var->name = ralloc_asprintf(var, "in_%d", idx);
 
-            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-             * instead, but nothing in NIR core is looking at the value
-             * currently, and this is less change to drivers.
-             */
-            var->data.location = decl->Semantic.Name;
-            var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+                                                decl->Semantic.Index);
+            } else {
+               assert(!decl->Declaration.Semantic);
+               var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+            }
+            var->data.index = 0;
 
             /* We definitely need to translate the interpolation field, because
              * nir_print will decode it.
@@ -241,6 +366,8 @@ ttn_emit_declaration(struct ttn_compile *c)
             exec_list_push_tail(&b->shader->inputs, &var->node);
             break;
          case TGSI_FILE_OUTPUT: {
+            int semantic_name = decl->Semantic.Name;
+            int semantic_index = decl->Semantic.Index;
             /* Since we can't load from outputs in the IR, we make temporaries
              * for the outputs and emit stores to the real outputs at the end of
              * the shader.
@@ -252,14 +379,40 @@ ttn_emit_declaration(struct ttn_compile *c)
 
             var->data.mode = nir_var_shader_out;
             var->name = ralloc_asprintf(var, "out_%d", idx);
-
-            var->data.location = decl->Semantic.Name;
-            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-                decl->Semantic.Index == 0 &&
-                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               var->data.index = -1;
-            else
-               var->data.index = decl->Semantic.Index;
+            var->data.index = 0;
+
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_COLOR: {
+                  /* TODO tgsi loses some information, so we cannot
+                   * actually differentiate here between DSB and MRT
+                   * at this point.  But so far no drivers using tgsi-
+                   * to-nir support dual source blend:
+                   */
+                  bool dual_src_blend = false;
+                  if (dual_src_blend && (semantic_index == 1)) {
+                     var->data.location = FRAG_RESULT_DATA0;
+                     var->data.index = 1;
+                  } else {
+                     if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+                        var->data.location = FRAG_RESULT_COLOR;
+                     else
+                        var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+                  }
+                  break;
+               }
+               case TGSI_SEMANTIC_POSITION:
+                  var->data.location = FRAG_RESULT_DEPTH;
+                  break;
+               default:
+                  fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+                          decl->Semantic.Name, decl->Semantic.Index);
+                  abort();
+               }
+            } else {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+            }
 
             if (is_array) {
                unsigned j;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.h b/src/gallium/auxiliary/nir/tgsi_to_nir.h
index 687348a80ef..1a185a83219 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -28,3 +28,9 @@ struct nir_shader_compiler_options *options;
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
             const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 7ce1c976e9c..83a138515b5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2133,17 +2133,12 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 	struct ir3_shader_variant *so = ctx->so;
 	unsigned array_len = MAX2(glsl_get_length(in->type), 1);
 	unsigned ncomp = glsl_get_components(in->type);
-	/* XXX: map loc slots to semantics */
-	unsigned semantic_name = in->data.location;
-	unsigned semantic_index = in->data.index;
 	unsigned n = in->data.driver_location;
+	unsigned slot = in->data.location;
 
-	DBG("; in: %u:%u, len=%ux%u, loc=%u",
-			semantic_name, semantic_index, array_len,
-			ncomp, n);
+	DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
+			slot, array_len, ncomp, n);
 
-	so->inputs[n].semantic =
-			ir3_semantic_name(semantic_name, semantic_index);
 	so->inputs[n].compmask = (1 << ncomp) - 1;
 	so->inputs[n].inloc = ctx->next_inloc;
 	so->inputs[n].interpolate = 0;
@@ -2164,11 +2159,19 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 		break;
 	}
 
-	for (int i = 0; i < ncomp; i++) {
-		struct ir3_instruction *instr = NULL;
-		unsigned idx = (n * 4) + i;
+	if (ctx->so->type == SHADER_FRAGMENT) {
+		unsigned semantic_name, semantic_index;
+
+		varying_slot_to_tgsi_semantic(slot,
+				&semantic_name, &semantic_index);
+
+		so->inputs[n].semantic =
+				ir3_semantic_name(semantic_name, semantic_index);
+
+		for (int i = 0; i < ncomp; i++) {
+			struct ir3_instruction *instr = NULL;
+			unsigned idx = (n * 4) + i;
 
-		if (ctx->so->type == SHADER_FRAGMENT) {
 			if (semantic_name == TGSI_SEMANTIC_POSITION) {
 				so->inputs[n].bary = false;
 				so->frag_coord = true;
@@ -2208,11 +2211,17 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 				instr = create_frag_input(ctx,
 						so->inputs[n].inloc + i - 8, use_ldlv);
 			}
-		} else {
-			instr = create_input(ctx->block, idx);
-		}
 
-		ctx->ir->inputs[idx] = instr;
+			ctx->ir->inputs[idx] = instr;
+		}
+	} else if (ctx->so->type == SHADER_VERTEX) {
+		so->inputs[n].semantic = 0;
+		for (int i = 0; i < ncomp; i++) {
+			unsigned idx = (n * 4) + i;
+			ctx->ir->inputs[idx] = create_input(ctx->block, idx);
+		}
+	} else {
+		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
 	}
 
 	if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
@@ -2227,17 +2236,18 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 	struct ir3_shader_variant *so = ctx->so;
 	unsigned array_len = MAX2(glsl_get_length(out->type), 1);
 	unsigned ncomp = glsl_get_components(out->type);
-	/* XXX: map loc slots to semantics */
-	unsigned semantic_name = out->data.location;
-	unsigned semantic_index = out->data.index;
+	unsigned semantic_name, semantic_index;
 	unsigned n = out->data.driver_location;
+	unsigned slot = out->data.location;
 	unsigned comp = 0;
 
-	DBG("; out: %u:%u, len=%ux%u, loc=%u",
-			semantic_name, semantic_index, array_len,
-			ncomp, n);
+	DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
+			slot, array_len, ncomp, n);
 
 	if (ctx->so->type == SHADER_VERTEX) {
+		varying_slot_to_tgsi_semantic(slot,
+				&semantic_name, &semantic_index);
+
 		switch (semantic_name) {
 		case TGSI_SEMANTIC_POSITION:
 			so->writes_pos = true;
@@ -2255,7 +2265,10 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 			compile_error(ctx, "unknown VS semantic name: %s\n",
 					tgsi_semantic_names[semantic_name]);
 		}
-	} else {
+	} else if (ctx->so->type == SHADER_FRAGMENT) {
+		frag_result_to_tgsi_semantic(slot,
+				&semantic_name, &semantic_index);
+
 		switch (semantic_name) {
 		case TGSI_SEMANTIC_POSITION:
 			comp = 2;  /* tgsi will write to .z component */
@@ -2271,6 +2284,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 			compile_error(ctx, "unknown FS semantic name: %s\n",
 					tgsi_semantic_names[semantic_name]);
 		}
+	} else {
+		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
 	}
 
 	compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 808cbea8fde..f8c3c5f65bf 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -38,6 +38,7 @@
 #include "util/u_format.h"
 #include "vc4_qir.h"
 #include "glsl/nir/nir_builder.h"
+#include "nir/tgsi_to_nir.h"
 #include "vc4_context.h"
 
 /** Emits a load of the previous fragment color from the tile buffer. */
@@ -400,7 +401,10 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
                         }
                 }
                 assert(output_var);
-                unsigned semantic_name = output_var->data.location;
+                unsigned semantic_name, semantic_index;
+
+                varying_slot_to_tgsi_semantic(output_var->data.location,
+                                              &semantic_name, &semantic_index);
 
                 if (semantic_name != TGSI_SEMANTIC_COLOR)
                         continue;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index c401415fda7..31ac64b0f7a 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -22,6 +22,7 @@
  */
 
 #include "vc4_qir.h"
+#include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_info.h"
 #include "glsl/nir/nir_builder.h"
 
@@ -71,8 +72,11 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                 }
         }
         assert(input_var);
-        int semantic_name = input_var->data.location;
-        int semantic_index = input_var->data.index;
+        unsigned semantic_name, semantic_index;
+
+        varying_slot_to_tgsi_semantic(input_var->data.location,
+                                      &semantic_name, &semantic_index);
+
 
         /* All TGSI-to-NIR inputs are vec4. */
         assert(intr->num_components == 4);
@@ -141,7 +145,10 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
                 }
         }
         assert(output_var);
-        unsigned semantic_name = output_var->data.location;
+        unsigned semantic_name, semantic_index;
+
+        varying_slot_to_tgsi_semantic(output_var->data.location,
+                                      &semantic_name, &semantic_index);
 
         if (c->stage == QSTAGE_COORD &&
             (semantic_name != TGSI_SEMANTIC_POSITION &&
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index e002983fdbb..7d59a2f0702 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1412,11 +1412,12 @@ ntq_setup_inputs(struct vc4_compile *c)
         for (unsigned i = 0; i < num_entries; i++) {
                 nir_variable *var = vars[i];
                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                /* XXX: map loc slots to semantics */
-                unsigned semantic_name = var->data.location;
-                unsigned semantic_index = var->data.index;
+                unsigned semantic_name, semantic_index;
                 unsigned loc = var->data.driver_location;
 
+                varying_slot_to_tgsi_semantic(var->data.location,
+                                              &semantic_name, &semantic_index);
+
                 assert(array_len == 1);
                 (void)array_len;
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
@@ -1448,11 +1449,17 @@ ntq_setup_outputs(struct vc4_compile *c)
 {
         foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                /* XXX: map loc slots to semantics */
-                unsigned semantic_name = var->data.location;
-                unsigned semantic_index = var->data.index;
+                unsigned semantic_name, semantic_index;
                 unsigned loc = var->data.driver_location * 4;
 
+                if (c->stage == QSTAGE_FRAG) {
+                        frag_result_to_tgsi_semantic(var->data.location,
+                                                     &semantic_name, &semantic_index);
+                } else {
+                        varying_slot_to_tgsi_semantic(var->data.location,
+                                                      &semantic_name, &semantic_index);
+                }
+
                 assert(array_len == 1);
                 (void)array_len;
 
-- 
cgit v1.2.3


From cfa980f49356eb2d94178f8cc9d67d01b4e3d695 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 9 Sep 2015 13:23:55 -0400
Subject: vc4: convert from tgsi semantic/index to varying-slot

(originally part of previous patch, split out to separate patch by Rob)

v2: squash in some fixes from Eric
v3: Another fix from Eric for point coords.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/vc4/vc4_context.h         |   9 +-
 src/gallium/drivers/vc4/vc4_nir_lower_blend.c |   9 +-
 src/gallium/drivers/vc4/vc4_nir_lower_io.c    |  29 ++---
 src/gallium/drivers/vc4/vc4_opt_dead_code.c   |   2 +-
 src/gallium/drivers/vc4/vc4_program.c         | 181 +++++++++++---------------
 src/gallium/drivers/vc4/vc4_qir.c             |   2 +-
 src/gallium/drivers/vc4/vc4_qir.h             |  21 ++-
 7 files changed, 106 insertions(+), 147 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 33b6ec2b92d..7502293180a 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -158,18 +158,17 @@ struct vc4_compiled_shader {
          * It doesn't include those that aren't part of the VPM, like
          * point/line coordinates.
          */
-        struct vc4_varying_semantic *input_semantics;
+        struct vc4_varying_slot *input_slots;
 };
 
 struct vc4_program_stateobj {
         struct vc4_uncompiled_shader *bind_vs, *bind_fs;
         struct vc4_compiled_shader *cs, *vs, *fs;
         uint8_t num_exports;
-        /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
-         * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
-         * size) are not included in this
+        /* Indexed by slot.  Special vs exports (position and pointsize) are
+         * not included in this
          */
-        uint8_t export_linkage[63];
+        uint8_t export_linkage[VARYING_SLOT_VAR0 + 8];
 };
 
 struct vc4_constbuf_stateobj {
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index f8c3c5f65bf..a842d604a51 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -38,7 +38,6 @@
 #include "util/u_format.h"
 #include "vc4_qir.h"
 #include "glsl/nir/nir_builder.h"
-#include "nir/tgsi_to_nir.h"
 #include "vc4_context.h"
 
 /** Emits a load of the previous fragment color from the tile buffer. */
@@ -401,13 +400,11 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
                         }
                 }
                 assert(output_var);
-                unsigned semantic_name, semantic_index;
 
-                varying_slot_to_tgsi_semantic(output_var->data.location,
-                                              &semantic_name, &semantic_index);
-
-                if (semantic_name != TGSI_SEMANTIC_COLOR)
+                if (output_var->data.location != FRAG_RESULT_COLOR &&
+                    output_var->data.location != FRAG_RESULT_DATA0) {
                         continue;
+                }
 
                 nir_function_impl *impl =
                         nir_cf_node_get_function(&block->cf_node);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 31ac64b0f7a..a98d70da7d8 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -22,8 +22,6 @@
  */
 
 #include "vc4_qir.h"
-#include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_info.h"
 #include "glsl/nir/nir_builder.h"
 
 /**
@@ -72,11 +70,6 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                 }
         }
         assert(input_var);
-        unsigned semantic_name, semantic_index;
-
-        varying_slot_to_tgsi_semantic(input_var->data.location,
-                                      &semantic_name, &semantic_index);
-
 
         /* All TGSI-to-NIR inputs are vec4. */
         assert(intr->num_components == 4);
@@ -96,8 +89,7 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
 
         switch (c->stage) {
         case QSTAGE_FRAG:
-                switch (semantic_name) {
-                case TGSI_SEMANTIC_FACE:
+                if (input_var->data.location == VARYING_SLOT_FACE) {
                         dests[0] = nir_fsub(b,
                                             nir_imm_float(b, 1.0),
                                             nir_fmul(b,
@@ -106,10 +98,10 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                         dests[1] = nir_imm_float(b, 0.0);
                         dests[2] = nir_imm_float(b, 0.0);
                         dests[3] = nir_imm_float(b, 1.0);
-                        break;
-                case TGSI_SEMANTIC_GENERIC:
+                } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
                         if (c->fs_key->point_sprite_mask &
-                            (1 << semantic_index)) {
+                            (1 << (input_var->data.location -
+                                   VARYING_SLOT_VAR0))) {
                                 if (!c->fs_key->is_points) {
                                         dests[0] = nir_imm_float(b, 0.0);
                                         dests[1] = nir_imm_float(b, 0.0);
@@ -122,7 +114,6 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                                 dests[2] = nir_imm_float(b, 0.0);
                                 dests[3] = nir_imm_float(b, 1.0);
                         }
-                        break;
                 }
                 break;
         case QSTAGE_COORD:
@@ -145,20 +136,18 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
                 }
         }
         assert(output_var);
-        unsigned semantic_name, semantic_index;
-
-        varying_slot_to_tgsi_semantic(output_var->data.location,
-                                      &semantic_name, &semantic_index);
 
         if (c->stage == QSTAGE_COORD &&
-            (semantic_name != TGSI_SEMANTIC_POSITION &&
-             semantic_name != TGSI_SEMANTIC_PSIZE)) {
+            output_var->data.location != VARYING_SLOT_POS &&
+            output_var->data.location != VARYING_SLOT_PSIZ) {
                 nir_instr_remove(&intr->instr);
                 return;
         }
 
         /* Color output is lowered by vc4_nir_lower_blend(). */
-        if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+        if (c->stage == QSTAGE_FRAG &&
+            (output_var->data.location == FRAG_RESULT_COLOR ||
+             output_var->data.location == FRAG_RESULT_DATA0)) {
                 intr->const_index[0] *= 4;
                 return;
         }
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
index ffd42422de8..9e79a2d1b2e 100644
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -69,7 +69,7 @@ has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst)
                 }
 
                 if (inst->src[i].file == QFILE_VARY &&
-                    c->input_semantics[inst->src[i].index].semantic == 0xff) {
+                    c->input_slots[inst->src[i].index].slot == 0xff) {
                         return true;
                 }
         }
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 7d59a2f0702..01ea7544984 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -30,7 +30,6 @@
 #include "util/ralloc.h"
 #include "util/hash_table.h"
 #include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_lowering.h"
 #include "tgsi/tgsi_parse.h"
 #include "glsl/nir/nir.h"
@@ -738,41 +737,36 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
 }
 
 static struct qreg
-emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
-                      uint8_t index, uint8_t swizzle)
+emit_fragment_varying(struct vc4_compile *c, gl_varying_slot slot,
+                      uint8_t swizzle)
 {
-        uint32_t i = c->num_input_semantics++;
+        uint32_t i = c->num_input_slots++;
         struct qreg vary = {
                 QFILE_VARY,
                 i
         };
 
-        if (c->num_input_semantics >= c->input_semantics_array_size) {
-                c->input_semantics_array_size =
-                        MAX2(4, c->input_semantics_array_size * 2);
+        if (c->num_input_slots >= c->input_slots_array_size) {
+                c->input_slots_array_size =
+                        MAX2(4, c->input_slots_array_size * 2);
 
-                c->input_semantics = reralloc(c, c->input_semantics,
-                                              struct vc4_varying_semantic,
-                                              c->input_semantics_array_size);
+                c->input_slots = reralloc(c, c->input_slots,
+                                          struct vc4_varying_slot,
+                                          c->input_slots_array_size);
         }
 
-        c->input_semantics[i].semantic = semantic;
-        c->input_semantics[i].index = index;
-        c->input_semantics[i].swizzle = swizzle;
+        c->input_slots[i].slot = slot;
+        c->input_slots[i].swizzle = swizzle;
 
         return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
 }
 
 static void
-emit_fragment_input(struct vc4_compile *c, int attr,
-                    unsigned semantic_name, unsigned semantic_index)
+emit_fragment_input(struct vc4_compile *c, int attr, gl_varying_slot slot)
 {
         for (int i = 0; i < 4; i++) {
                 c->inputs[attr * 4 + i] =
-                        emit_fragment_varying(c,
-                                              semantic_name,
-                                              semantic_index,
-                                              i);
+                        emit_fragment_varying(c, slot, i);
                 c->num_inputs++;
         }
 }
@@ -780,24 +774,22 @@ emit_fragment_input(struct vc4_compile *c, int attr,
 static void
 add_output(struct vc4_compile *c,
            uint32_t decl_offset,
-           uint8_t semantic_name,
-           uint8_t semantic_index,
-           uint8_t semantic_swizzle)
+           uint8_t slot,
+           uint8_t swizzle)
 {
         uint32_t old_array_size = c->outputs_array_size;
         resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
                           decl_offset + 1);
 
         if (old_array_size != c->outputs_array_size) {
-                c->output_semantics = reralloc(c,
-                                               c->output_semantics,
-                                               struct vc4_varying_semantic,
-                                               c->outputs_array_size);
+                c->output_slots = reralloc(c,
+                                           c->output_slots,
+                                           struct vc4_varying_slot,
+                                           c->outputs_array_size);
         }
 
-        c->output_semantics[decl_offset].semantic = semantic_name;
-        c->output_semantics[decl_offset].index = semantic_index;
-        c->output_semantics[decl_offset].swizzle = semantic_swizzle;
+        c->output_slots[decl_offset].slot = slot;
+        c->output_slots[decl_offset].swizzle = swizzle;
 }
 
 static void
@@ -1129,10 +1121,10 @@ clip_distance_discard(struct vc4_compile *c)
                 if (!(c->key->ucp_enables & (1 << i)))
                         continue;
 
-                struct qreg dist = emit_fragment_varying(c,
-                                                         TGSI_SEMANTIC_CLIPDIST,
-                                                         i,
-                                                         TGSI_SWIZZLE_X);
+                struct qreg dist =
+                        emit_fragment_varying(c,
+                                              VARYING_SLOT_CLIP_DIST0 + (i / 4),
+                                              i % 4);
 
                 qir_SF(c, dist);
 
@@ -1285,9 +1277,8 @@ emit_ucp_clipdistance(struct vc4_compile *c)
                  */
                 uint32_t output_index = c->num_outputs++;
                 add_output(c, output_index,
-                           TGSI_SEMANTIC_CLIPDIST,
-                           plane,
-                           TGSI_SWIZZLE_X);
+                           VARYING_SLOT_CLIP_DIST0 + plane / 4,
+                           plane % 4);
 
 
                 struct qreg dist = qir_uniform_f(c, 0.0);
@@ -1305,7 +1296,7 @@ emit_ucp_clipdistance(struct vc4_compile *c)
 
 static void
 emit_vert_end(struct vc4_compile *c,
-              struct vc4_varying_semantic *fs_inputs,
+              struct vc4_varying_slot *fs_inputs,
               uint32_t num_fs_inputs)
 {
         struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
@@ -1320,15 +1311,14 @@ emit_vert_end(struct vc4_compile *c,
                 emit_point_size_write(c);
 
         for (int i = 0; i < num_fs_inputs; i++) {
-                struct vc4_varying_semantic *input = &fs_inputs[i];
+                struct vc4_varying_slot *input = &fs_inputs[i];
                 int j;
 
                 for (j = 0; j < c->num_outputs; j++) {
-                        struct vc4_varying_semantic *output =
-                                &c->output_semantics[j];
+                        struct vc4_varying_slot *output =
+                                &c->output_slots[j];
 
-                        if (input->semantic == output->semantic &&
-                            input->index == output->index &&
+                        if (input->slot == output->slot &&
                             input->swizzle == output->swizzle) {
                                 qir_VPM_WRITE(c, c->outputs[j]);
                                 break;
@@ -1412,31 +1402,26 @@ ntq_setup_inputs(struct vc4_compile *c)
         for (unsigned i = 0; i < num_entries; i++) {
                 nir_variable *var = vars[i];
                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                unsigned semantic_name, semantic_index;
                 unsigned loc = var->data.driver_location;
 
-                varying_slot_to_tgsi_semantic(var->data.location,
-                                              &semantic_name, &semantic_index);
-
                 assert(array_len == 1);
                 (void)array_len;
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                   (loc + 1) * 4);
 
                 if (c->stage == QSTAGE_FRAG) {
-                        if (semantic_name == TGSI_SEMANTIC_POSITION) {
+                        if (var->data.location == VARYING_SLOT_POS) {
                                 emit_fragcoord_input(c, loc);
-                        } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+                        } else if (var->data.location == VARYING_SLOT_FACE) {
                                 c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
-                        } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
+                        } else if (var->data.location >= VARYING_SLOT_VAR0 &&
                                    (c->fs_key->point_sprite_mask &
-                                    (1 << semantic_index))) {
+                                    (1 << (var->data.location -
+                                           VARYING_SLOT_VAR0)))) {
                                 c->inputs[loc * 4 + 0] = c->point_x;
                                 c->inputs[loc * 4 + 1] = c->point_y;
                         } else {
-                                emit_fragment_input(c, loc,
-                                                    semantic_name,
-                                                    semantic_index);
+                                emit_fragment_input(c, loc, var->data.location);
                         }
                 } else {
                         emit_vertex_input(c, loc);
@@ -1449,49 +1434,37 @@ ntq_setup_outputs(struct vc4_compile *c)
 {
         foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                unsigned semantic_name, semantic_index;
                 unsigned loc = var->data.driver_location * 4;
 
-                if (c->stage == QSTAGE_FRAG) {
-                        frag_result_to_tgsi_semantic(var->data.location,
-                                                     &semantic_name, &semantic_index);
-                } else {
-                        varying_slot_to_tgsi_semantic(var->data.location,
-                                                      &semantic_name, &semantic_index);
-                }
-
                 assert(array_len == 1);
                 (void)array_len;
 
-                /* NIR hack to pass through
-                 * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
-                if (semantic_name == TGSI_SEMANTIC_COLOR &&
-                    semantic_index == -1)
-                        semantic_index = 0;
-
-                for (int i = 0; i < 4; i++) {
-                        add_output(c,
-                                   loc + i,
-                                   semantic_name,
-                                   semantic_index,
-                                   i);
-                }
+                for (int i = 0; i < 4; i++)
+                        add_output(c, loc + i, var->data.location, i);
 
-                switch (semantic_name) {
-                case TGSI_SEMANTIC_POSITION:
-                        c->output_position_index = loc;
-                        break;
-                case TGSI_SEMANTIC_CLIPVERTEX:
-                        c->output_clipvertex_index = loc;
-                        break;
-                case TGSI_SEMANTIC_COLOR:
-                        c->output_color_index = loc;
-                        break;
-                case TGSI_SEMANTIC_PSIZE:
-                        c->output_point_size_index = loc;
-                        break;
+                if (c->stage == QSTAGE_FRAG) {
+                        switch (var->data.location) {
+                        case FRAG_RESULT_COLOR:
+                        case FRAG_RESULT_DATA0:
+                                c->output_color_index = loc;
+                                break;
+                        case FRAG_RESULT_DEPTH:
+                                c->output_position_index = loc;
+                                break;
+                        }
+                } else {
+                        switch (var->data.location) {
+                        case VARYING_SLOT_POS:
+                                c->output_position_index = loc;
+                                break;
+                        case VARYING_SLOT_CLIP_VERTEX:
+                                c->output_clipvertex_index = loc;
+                                break;
+                        case VARYING_SLOT_PSIZ:
+                                c->output_point_size_index = loc;
+                                break;
+                        }
                 }
-
         }
 }
 
@@ -1750,10 +1723,10 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         case QSTAGE_FRAG:
                 c->fs_key = (struct vc4_fs_key *)key;
                 if (c->fs_key->is_points) {
-                        c->point_x = emit_fragment_varying(c, ~0, ~0, 0);
-                        c->point_y = emit_fragment_varying(c, ~0, ~0, 0);
+                        c->point_x = emit_fragment_varying(c, ~0, 0);
+                        c->point_y = emit_fragment_varying(c, ~0, 0);
                 } else if (c->fs_key->is_lines) {
-                        c->line_x = emit_fragment_varying(c, ~0, ~0, 0);
+                        c->line_x = emit_fragment_varying(c, ~0, 0);
                 }
                 break;
         case QSTAGE_VERT:
@@ -1831,7 +1804,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
                 break;
         case QSTAGE_VERT:
                 emit_vert_end(c,
-                              vc4->prog.fs->input_semantics,
+                              vc4->prog.fs->input_slots,
                               vc4->prog.fs->num_inputs);
                 break;
         case QSTAGE_COORD:
@@ -1932,7 +1905,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
 
         shader->program_id = vc4->next_compiled_program_id++;
         if (stage == QSTAGE_FRAG) {
-                bool input_live[c->num_input_semantics];
+                bool input_live[c->num_input_slots];
 
                 memset(input_live, 0, sizeof(input_live));
                 list_for_each_entry(struct qinst, inst, &c->instructions, link) {
@@ -1942,26 +1915,28 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
                         }
                 }
 
-                shader->input_semantics = ralloc_array(shader,
-                                                       struct vc4_varying_semantic,
-                                                       c->num_input_semantics);
+                shader->input_slots = ralloc_array(shader,
+                                                   struct vc4_varying_slot,
+                                                   c->num_input_slots);
 
-                for (int i = 0; i < c->num_input_semantics; i++) {
-                        struct vc4_varying_semantic *sem = &c->input_semantics[i];
+                for (int i = 0; i < c->num_input_slots; i++) {
+                        struct vc4_varying_slot *slot = &c->input_slots[i];
 
                         if (!input_live[i])
                                 continue;
 
                         /* Skip non-VS-output inputs. */
-                        if (sem->semantic == (uint8_t)~0)
+                        if (slot->slot == (uint8_t)~0)
                                 continue;
 
-                        if (sem->semantic == TGSI_SEMANTIC_COLOR ||
-                            sem->semantic == TGSI_SEMANTIC_BCOLOR) {
+                        if (slot->slot == VARYING_SLOT_COL0 ||
+                            slot->slot == VARYING_SLOT_COL1 ||
+                            slot->slot == VARYING_SLOT_BFC0 ||
+                            slot->slot == VARYING_SLOT_BFC1) {
                                 shader->color_inputs |= (1 << shader->num_inputs);
                         }
 
-                        shader->input_semantics[shader->num_inputs] = *sem;
+                        shader->input_slots[shader->num_inputs] = *slot;
                         shader->num_inputs++;
                 }
         } else {
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 073ba5ffc23..e385fbb65ae 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -147,7 +147,7 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
          */
         for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                 if (inst->src[i].file == QFILE_VARY &&
-                    c->input_semantics[inst->src[i].index].semantic == 0xff) {
+                    c->input_slots[inst->src[i].index].slot == 0xff) {
                         return true;
                 }
 
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index a2b21fa17bb..ddde96db6b4 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -254,9 +254,8 @@ enum quniform_contents {
         QUNIFORM_ALPHA_REF,
 };
 
-struct vc4_varying_semantic {
-        uint8_t semantic;
-        uint8_t index;
+struct vc4_varying_slot {
+        uint8_t slot;
         uint8_t swizzle;
 };
 
@@ -372,21 +371,21 @@ struct vc4_compile {
         uint8_t vattr_sizes[8];
 
         /**
-         * Array of the TGSI semantics of all FS QFILE_VARY reads.
+         * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
          *
          * This includes those that aren't part of the VPM varyings, like
          * point/line coordinates.
          */
-        struct vc4_varying_semantic *input_semantics;
-        uint32_t num_input_semantics;
-        uint32_t input_semantics_array_size;
+        struct vc4_varying_slot *input_slots;
+        uint32_t num_input_slots;
+        uint32_t input_slots_array_size;
 
         /**
-         * An entry per outputs[] in the VS indicating what the semantic of
-         * the output is.  Used to emit from the VS in the order that the FS
-         * needs.
+         * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
+         * of the output is.  Used to emit from the VS in the order that the
+         * FS needs.
          */
-        struct vc4_varying_semantic *output_semantics;
+        struct vc4_varying_slot *output_slots;
 
         struct pipe_shader_state *shader_state;
         struct vc4_key *key;
-- 
cgit v1.2.3


From ee6b95c82c3dbfad07e75ecdfd993e60fec2ac4f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 13 Sep 2015 19:50:45 -0400
Subject: freedreno/a3xx: add support for dual-source blending

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html                         |  1 +
 src/gallium/drivers/freedreno/a3xx/fd3_blend.c    |  4 ++++
 src/gallium/drivers/freedreno/a3xx/fd3_blend.h    |  1 +
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     | 14 ++++++++++----
 src/gallium/drivers/freedreno/freedreno_context.h |  1 +
 src/gallium/drivers/freedreno/freedreno_screen.c  |  3 ++-
 src/gallium/drivers/freedreno/freedreno_state.c   | 10 ++++++++++
 src/gallium/drivers/freedreno/freedreno_util.c    |  5 ++++-
 8 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 6abdf7ad855..24fdf2e9683 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
index 6f5de9d47f7..35360f33822 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_state.h"
 #include "util/u_blend.h"
+#include "util/u_dual_blend.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"
 
@@ -131,5 +132,8 @@ fd3_blend_state_create(struct pipe_context *pctx,
 			so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
 	}
 
+	if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+		so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
 	return so;
 }
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
index 142df7c300f..59e0010372d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -36,6 +36,7 @@
 
 struct fd3_blend_stateobj {
 	struct pipe_blend_state base;
+	uint32_t rb_render_control;
 	struct {
 		/* Blend control bits for color if there is an alpha channel */
 		uint32_t blend_control_rgb;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index b81bc5a90a4..40301971d90 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -443,8 +443,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
 	}
 
-	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
-		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
+		!emit->key.binning_pass) {
+		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+			fd3_blend_stateobj(ctx->blend)->rb_render_control;
 
 		val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
 		val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
@@ -590,9 +592,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
 	}
 
-	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-		fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+		int nr_cbufs = pfb->nr_cbufs;
+		if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+			A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+			nr_cbufs++;
+		fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
 	}
 
 	/* TODO we should not need this or fd_wfi() before emit_constants():
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 3486c2fd1b7..61c4c6d6e24 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -335,6 +335,7 @@ struct fd_context {
 		FD_DIRTY_SCISSOR     = (1 << 17),
 		FD_DIRTY_STREAMOUT   = (1 << 18),
 		FD_DIRTY_UCP         = (1 << 19),
+		FD_DIRTY_BLEND_DUAL  = (1 << 20),
 	} dirty;
 
 	struct pipe_blend_state *blend;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index a3dede2500e..9a684d4ffbb 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -163,7 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_COMPUTE:
 		return 0;
@@ -278,6 +277,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	/* Render targets. */
 	case PIPE_CAP_MAX_RENDER_TARGETS:
 		return screen->max_rts;
+	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+		return is_a3xx(screen) ? 1 : 0;
 
 	/* Queries. */
 	case PIPE_CAP_QUERY_TIME_ELAPSED:
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index e75865a9387..685d3a75659 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -27,6 +27,7 @@
  */
 
 #include "pipe/p_state.h"
+#include "util/u_dual_blend.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
@@ -225,8 +226,17 @@ static void
 fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
 {
 	struct fd_context *ctx = fd_context(pctx);
+	struct pipe_blend_state *cso = hwcso;
+	bool old_is_dual = ctx->blend ?
+		ctx->blend->rt[0].blend_enable && util_blend_state_is_dual(ctx->blend, 0) :
+		false;
+	bool new_is_dual = cso ?
+		cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0) :
+		false;
 	ctx->blend = hwcso;
 	ctx->dirty |= FD_DIRTY_BLEND;
+	if (old_is_dual != new_is_dual)
+		ctx->dirty |= FD_DIRTY_BLEND_DUAL;
 }
 
 static void
diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c
index 2acce06d148..c8f2127c910 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.c
+++ b/src/gallium/drivers/freedreno/freedreno_util.c
@@ -104,10 +104,13 @@ fd_blend_factor(unsigned factor)
 	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 		return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
 	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+		return FACTOR_ONE_MINUS_SRC1_COLOR;
 	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+		return FACTOR_ONE_MINUS_SRC1_ALPHA;
 	case PIPE_BLENDFACTOR_SRC1_COLOR:
+		return FACTOR_SRC1_COLOR;
 	case PIPE_BLENDFACTOR_SRC1_ALPHA:
-		/* I don't think these are supported */
+		return FACTOR_SRC1_ALPHA;
 	default:
 		DBG("invalid blend factor: %x", factor);
 		return 0;
-- 
cgit v1.2.3


From 545a3cbb011e0e7722c2accb330c0994aea5cc38 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 14 Sep 2015 01:59:01 -0400
Subject: freedreno/a3xx: fix blending of L8 format

Even though luminance formats don't have alpha, we still want the alpha
output to go to the blender. This fixes the luminance blending tests.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_format.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 04cb9b98fb7..857d156c869 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 		return RB_R16G16B16A16_FLOAT;
+	case PIPE_FORMAT_L8_UNORM:
+		return RB_R8G8B8A8_UNORM;
 	default:
 		return fd3_pipe2color(format);
 	}
-- 
cgit v1.2.3


From 37d0becfd91c0bb8202dd6994b6bba7e5d761990 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 16 Sep 2015 14:19:21 -0400
Subject: freedreno/a3xx: use NUM_USER_CLIP_PLANES helper instead of magic
 number

Use the helper from the newly-updated generated header file.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 40301971d90..dbddb293a9c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -517,7 +517,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
 		/* TODO only use if prog doesn't use clipvertex/clipdist */
-		val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
+		val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+				MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}
-- 
cgit v1.2.3


From b5df52b1128049bf688cace6e581a16d5f1ad5bb Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Thu, 17 Sep 2015 06:55:31 +1000
Subject: Revert "mesa/extensions: restrict GL_OES_EGL_image to GLES"

This reverts commit 48961fa3ba37999a6f8fd812458b735e39604a95.

glamor/Xwayland use this, the spec saying something when it
was written, and the fact that the comment says Mesa relies on it
hasn't changed.

I also don't have a copy of this patch in my mail archive, which
seems wierd, did it get posted to mesa-dev?

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/main/extensions.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 767c50e28c0..b2c88c37366 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -307,7 +307,8 @@ static const struct extension extension_table[] = {
    { "GL_OES_depth_texture_cube_map",              o(OES_depth_texture_cube_map),                         ES2, 2012 },
    { "GL_OES_draw_texture",                        o(OES_draw_texture),                             ES1,       2004 },
    { "GL_OES_EGL_sync",                            o(dummy_true),                                   ES1 | ES2, 2010 },
-   { "GL_OES_EGL_image",                           o(OES_EGL_image),                                ES1 | ES2, 2006 },
+   /*  FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+   { "GL_OES_EGL_image",                           o(OES_EGL_image),                           GL | ES1 | ES2, 2006 },
    { "GL_OES_EGL_image_external",                  o(OES_EGL_image_external),                       ES1 | ES2, 2010 },
    { "GL_OES_element_index_uint",                  o(dummy_true),                                   ES1 | ES2, 2005 },
    { "GL_OES_fbo_render_mipmap",                   o(dummy_true),                                   ES1 | ES2, 2005 },
-- 
cgit v1.2.3


From 874a0217fd8bba83b0bc2448f5156fdb82f77d7c Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Sun, 30 Aug 2015 12:49:46 +1000
Subject: glsl: order indices for samplers inside a struct array

This allows the correct offset to be easily calculated for indirect
indexing when a struct array contains multiple samplers, or any crazy
nesting.

The indices for the folling struct will now look like this:
Sampler index: 0 Name: s[0].tex
Sampler index: 1 Name: s[1].tex
Sampler index: 2 Name: s[0].si.tex
Sampler index: 3 Name: s[1].si.tex
Sampler index: 4 Name: s[0].si.tex2
Sampler index: 5 Name: s[1].si.tex2

Before this change it looked like this:
Sampler index: 0 Name: s[0].tex
Sampler index: 3 Name: s[1].tex
Sampler index: 1 Name: s[0].si.tex
Sampler index: 4 Name: s[1].si.tex
Sampler index: 2 Name: s[0].si.tex2
Sampler index: 5 Name: s[1].si.tex2

struct S_inner {
   sampler2D tex;
   sampler2D tex2;
};

struct S {
   sampler2D tex;
   S_inner si;
};

uniform S s[2];

V3: Update comments with suggestions from Jason

V2: rename struct array counter to have better name

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/link_uniforms.cpp | 117 ++++++++++++++++++++++++++++++++++++++-------
 src/glsl/linker.h          |   4 +-
 2 files changed, 103 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index a0cb6182925..c21ce22622e 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -28,6 +28,7 @@
 #include "glsl_symbol_table.h"
 #include "program/hash_table.h"
 #include "program.h"
+#include "util/hash_table.h"
 
 /**
  * \file link_uniforms.cpp
@@ -62,14 +63,17 @@ program_resource_visitor::process(const glsl_type *type, const char *name)
    assert(type->without_array()->is_record()
           || type->without_array()->is_interface());
 
+   unsigned record_array_count = 1;
    char *name_copy = ralloc_strdup(NULL, name);
-   recursion(type, &name_copy, strlen(name), false, NULL, false);
+   recursion(type, &name_copy, strlen(name), false, NULL, false,
+             record_array_count);
    ralloc_free(name_copy);
 }
 
 void
 program_resource_visitor::process(ir_variable *var)
 {
+   unsigned record_array_count = 1;
    const glsl_type *t = var->type;
    const bool row_major =
       var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
@@ -110,7 +114,8 @@ program_resource_visitor::process(ir_variable *var)
           * lowering is only applied to non-uniform interface blocks, so we
           * can safely pass false for row_major.
           */
-         recursion(var->type, &name, new_length, row_major, NULL, false);
+         recursion(var->type, &name, new_length, row_major, NULL, false,
+                   record_array_count);
       }
       ralloc_free(name);
    } else if (var->data.from_named_ifc_block_nonarray) {
@@ -134,19 +139,23 @@ program_resource_visitor::process(ir_variable *var)
        * is only applied to non-uniform interface blocks, so we can safely
        * pass false for row_major.
        */
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, false,
+                record_array_count);
       ralloc_free(name);
    } else if (t->without_array()->is_record()) {
       char *name = ralloc_strdup(NULL, var->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, false,
+                record_array_count);
       ralloc_free(name);
    } else if (t->is_interface()) {
       char *name = ralloc_strdup(NULL, var->type->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, false,
+                record_array_count);
       ralloc_free(name);
    } else if (t->is_array() && t->fields.array->is_interface()) {
       char *name = ralloc_strdup(NULL, var->type->fields.array->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, false,
+                record_array_count);
       ralloc_free(name);
    } else {
       this->visit_field(t, var->name, row_major, NULL, false);
@@ -157,7 +166,8 @@ void
 program_resource_visitor::recursion(const glsl_type *t, char **name,
                                     size_t name_length, bool row_major,
                                     const glsl_type *record_type,
-                                    bool last_field)
+                                    bool last_field,
+                                    unsigned record_array_count)
 {
    /* Records need to have each field processed individually.
     *
@@ -204,7 +214,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
          recursion(t->fields.structure[i].type, name, new_length,
                    field_row_major,
                    record_type,
-                   (i + 1) == t->length);
+                   (i + 1) == t->length, record_array_count);
 
          /* Only the first leaf-field of the record gets called with the
           * record type pointer.
@@ -221,6 +231,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
       if (record_type == NULL && t->fields.array->is_record())
          record_type = t->fields.array;
 
+      record_array_count *= t->length;
+
       for (unsigned i = 0; i < t->length; i++) {
 	 size_t new_length = name_length;
 
@@ -229,7 +241,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
 
          recursion(t->fields.array, name, new_length, row_major,
                    record_type,
-                   (i + 1) == t->length);
+                   (i + 1) == t->length, record_array_count);
 
          /* Only the first leaf-field of the record gets called with the
           * record type pointer.
@@ -237,6 +249,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
          record_type = NULL;
       }
    } else {
+      this->set_record_array_count(record_array_count);
       this->visit_field(t, *name, row_major, record_type, last_field);
    }
 }
@@ -267,6 +280,11 @@ program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
 {
 }
 
+void
+program_resource_visitor::set_record_array_count(unsigned)
+{
+}
+
 namespace {
 
 /**
@@ -431,6 +449,7 @@ public:
       this->next_sampler = 0;
       this->next_image = 0;
       this->next_subroutine = 0;
+      this->record_array_count = 1;
       memset(this->targets, 0, sizeof(this->targets));
    }
 
@@ -439,6 +458,7 @@ public:
    {
       current_var = var;
       field_counter = 0;
+      this->record_next_sampler = new string_to_uint_map;
 
       ubo_block_index = -1;
       if (var->is_in_buffer_block()) {
@@ -492,6 +512,8 @@ public:
             process(var);
       } else
          process(var);
+      }
+      delete this->record_next_sampler;
    }
 
    int ubo_block_index;
@@ -500,17 +522,65 @@ public:
 
 private:
    void handle_samplers(const glsl_type *base_type,
-                        struct gl_uniform_storage *uniform)
+                        struct gl_uniform_storage *uniform, const char *name)
    {
       if (base_type->is_sampler()) {
-         uniform->sampler[shader_type].index = this->next_sampler;
          uniform->sampler[shader_type].active = true;
 
-         /* Increment the sampler by 1 for non-arrays and by the number of
-          * array elements for arrays.
-          */
-         this->next_sampler +=
-               MAX2(1, uniform->array_elements);
+         /* Handle multiple samplers inside struct arrays */
+         if (this->record_array_count > 1) {
+            unsigned inner_array_size = MAX2(1, uniform->array_elements);
+            char *name_copy = ralloc_strdup(NULL, name);
+
+            /* Remove all array subscripts from the sampler name */
+            char *str_start;
+            const char *str_end;
+            while((str_start = strchr(name_copy, '[')) &&
+                  (str_end = strchr(name_copy, ']'))) {
+               memmove(str_start, str_end + 1, 1 + strlen(str_end));
+            }
+
+            unsigned index = 0;
+            if (this->record_next_sampler->get(index, name_copy)) {
+               /* In this case, we've already seen this uniform so we just use
+                * the next sampler index recorded the last time we visited.
+                */
+               uniform->sampler[shader_type].index = index;
+               index = inner_array_size + uniform->sampler[shader_type].index;
+               this->record_next_sampler->put(index, name_copy);
+
+               ralloc_free(name_copy);
+               /* Return as everything else has already been initialised in a
+                * previous pass.
+                */
+               return;
+            } else {
+               /* We've never seen this uniform before so we need to allocate
+                * enough indices to store it.
+                *
+                * Nested struct arrays behave like arrays of arrays so we need
+                * to increase the index by the total number of elements of the
+                * sampler in case there is more than one sampler inside the
+                * structs. This allows the offset to be easily calculated for
+                * indirect indexing.
+                */
+               uniform->sampler[shader_type].index = this->next_sampler;
+               this->next_sampler +=
+                  inner_array_size * this->record_array_count;
+
+               /* Store the next index for future passes over the struct array
+                */
+               index = uniform->sampler[shader_type].index + inner_array_size;
+               this->record_next_sampler->put(index, name_copy);
+               ralloc_free(name_copy);
+            }
+         } else {
+            /* Increment the sampler by 1 for non-arrays and by the number of
+             * array elements for arrays.
+             */
+            uniform->sampler[shader_type].index = this->next_sampler;
+            this->next_sampler += MAX2(1, uniform->array_elements);
+         }
 
          const gl_texture_index target = base_type->sampler_index();
          const unsigned shadow = base_type->sampler_shadow;
@@ -563,6 +633,11 @@ private:
       }
    }
 
+   virtual void set_record_array_count(unsigned record_array_count)
+   {
+      this->record_array_count = record_array_count;
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major)
    {
@@ -614,7 +689,7 @@ private:
       }
 
       /* This assigns uniform indices to sampler and image uniforms. */
-      handle_samplers(base_type, &this->uniforms[id]);
+      handle_samplers(base_type, &this->uniforms[id], name);
       handle_images(base_type, &this->uniforms[id]);
       handle_subroutines(base_type, &this->uniforms[id]);
 
@@ -703,6 +778,14 @@ private:
    unsigned next_image;
    unsigned next_subroutine;
 
+   /* Stores total struct array elements including nested structs */
+   unsigned record_array_count;
+
+   /* Map for temporarily storing next sampler index when handling samplers in
+    * struct arrays.
+    */
+   struct string_to_uint_map *record_next_sampler;
+
 public:
    union gl_constant_value *values;
 
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index 0999878c65a..b31052e767e 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -178,6 +178,8 @@ protected:
    virtual void leave_record(const glsl_type *type, const char *name,
                              bool row_major);
 
+   virtual void set_record_array_count(unsigned record_array_count);
+
 private:
    /**
     * \param name_length  Length of the current name \b not including the
@@ -188,7 +190,7 @@ private:
     */
    void recursion(const glsl_type *t, char **name, size_t name_length,
                   bool row_major, const glsl_type *record_type,
-                  bool last_field);
+                  bool last_field, unsigned record_array_count);
 };
 
 void
-- 
cgit v1.2.3


From 9788700caf61ff8beee5fd836f5efd98a931a976 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 2 Sep 2015 11:29:11 +1000
Subject: glsl: assign hidden uniforms their slot id earlier

This is required so that the next patch can safely assign the slot id
to the var.

The ids are now assigned in the order we want before allocating storage
so there is no need to sort the storage array and move things around.

V2: rename variable to make code easier to follow as suggested by Jason

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/link_uniforms.cpp | 90 +++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index c21ce22622e..6d277fa43ca 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -299,11 +299,12 @@ namespace {
  */
 class count_uniform_size : public program_resource_visitor {
 public:
-   count_uniform_size(struct string_to_uint_map *map)
-      : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
-        num_shader_images(0), num_shader_uniform_components(0),
-        num_shader_subroutines(0),
-        is_ubo_var(false), map(map)
+   count_uniform_size(struct string_to_uint_map *map,
+                      struct string_to_uint_map *hidden_map)
+      : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
+        num_shader_samplers(0), num_shader_images(0),
+        num_shader_uniform_components(0), num_shader_subroutines(0),
+        is_ubo_var(false), map(map), hidden_map(hidden_map)
    {
       /* empty */
    }
@@ -318,6 +319,7 @@ public:
 
    void process(ir_variable *var)
    {
+      this->current_var = var;
       this->is_ubo_var = var->is_in_buffer_block();
       if (var->is_interface_instance())
          program_resource_visitor::process(var->get_interface_type(),
@@ -331,6 +333,8 @@ public:
     */
    unsigned num_active_uniforms;
 
+   unsigned num_hidden_uniforms;
+
    /**
     * Number of data values required to back the storage for the active uniforms
     */
@@ -358,6 +362,8 @@ public:
 
    bool is_ubo_var;
 
+   struct string_to_uint_map *map;
+
 private:
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major)
@@ -401,7 +407,13 @@ private:
       if (this->map->get(id, name))
 	 return;
 
-      this->map->put(this->num_active_uniforms, name);
+      if (this->current_var->data.how_declared == ir_var_hidden) {
+         this->hidden_map->put(this->num_hidden_uniforms, name);
+         this->num_hidden_uniforms++;
+      } else {
+         this->map->put(this->num_active_uniforms-this->num_hidden_uniforms,
+                        name);
+      }
 
       /* Each leaf uniform occupies one entry in the list of active
        * uniforms.
@@ -410,7 +422,12 @@ private:
       this->num_values += values;
    }
 
-   struct string_to_uint_map *map;
+   struct string_to_uint_map *hidden_map;
+
+   /**
+    * Current variable being processed.
+    */
+   ir_variable *current_var;
 };
 
 } /* anonymous namespace */
@@ -965,47 +982,19 @@ link_set_image_access_qualifiers(struct gl_shader_program *prog)
 }
 
 /**
- * Sort the array of uniform storage so that the non-hidden uniforms are first
- *
- * This function sorts the list "in place."  This is important because some of
- * the storage accessible from \c uniforms has \c uniforms as its \c ralloc
- * context.  If \c uniforms is freed, some other storage will also be freed.
+ * Combine the hidden uniform hash map with the uniform hash map so that the
+ * hidden uniforms will be given indicies at the end of the uniform storage
+ * array.
  */
-static unsigned
-move_hidden_uniforms_to_end(struct gl_shader_program *prog,
-                            struct gl_uniform_storage *uniforms,
-                            unsigned num_elements)
+static void
+assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
+                              void *closure)
 {
-   struct gl_uniform_storage *sorted_uniforms =
-      ralloc_array(prog, struct gl_uniform_storage, num_elements);
-   unsigned hidden_uniforms = 0;
-   unsigned j = 0;
-
-   /* Add the non-hidden uniforms. */
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (!uniforms[i].hidden)
-         sorted_uniforms[j++] = uniforms[i];
-   }
-
-   /* Add and count the hidden uniforms. */
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (uniforms[i].hidden) {
-         sorted_uniforms[j++] = uniforms[i];
-         hidden_uniforms++;
-      }
-   }
+   count_uniform_size *uniform_size = (count_uniform_size *) closure;
+   unsigned hidden_uniform_start = uniform_size->num_active_uniforms -
+      uniform_size->num_hidden_uniforms;
 
-   assert(prog->UniformHash != NULL);
-   prog->UniformHash->clear();
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (sorted_uniforms[i].name != NULL)
-         prog->UniformHash->put(i, sorted_uniforms[i].name);
-   }
-
-   memcpy(uniforms, sorted_uniforms, sizeof(uniforms[0]) * num_elements);
-   ralloc_free(sorted_uniforms);
-
-   return hidden_uniforms;
+   uniform_size->map->put(hidden_uniform_start + hidden_id, name);
 }
 
 void
@@ -1029,7 +1018,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
     * Note: this is *NOT* the index that is returned to the application by
     * glGetUniformLocation.
     */
-   count_uniform_size uniform_size(prog->UniformHash);
+   struct string_to_uint_map *hiddenUniforms = new string_to_uint_map;
+   count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms);
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_shader *sh = prog->_LinkedShaders[i];
 
@@ -1077,12 +1067,17 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
 
    const unsigned num_uniforms = uniform_size.num_active_uniforms;
    const unsigned num_data_slots = uniform_size.num_values;
+   const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
 
    /* On the outside chance that there were no uniforms, bail out.
     */
    if (num_uniforms == 0)
       return;
 
+   /* assign hidden uniforms a slot id */
+   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
+   delete hiddenUniforms;
+
    struct gl_uniform_storage *uniforms =
       rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
    union gl_constant_value *data =
@@ -1116,9 +1111,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
              sizeof(prog->_LinkedShaders[i]->SamplerTargets));
    }
 
-   const unsigned hidden_uniforms =
-      move_hidden_uniforms_to_end(prog, uniforms, num_uniforms);
-
    /* Reserve all the explicit locations of the active uniforms. */
    for (unsigned i = 0; i < num_uniforms; i++) {
       if (uniforms[i].type->is_subroutine())
-- 
cgit v1.2.3


From dcd9cd03837545055ce2a315e7e8840cc3254d1a Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Sun, 30 Aug 2015 12:50:34 +1000
Subject: glsl: store uniform slot id in var location field

This will allow us to access the uniform later on without resorting to
building a name string and looking it up in UniformHash.

V3: remove line wrap change from this patch

V2: store slot number for all non-UBO uniforms to make code more
consitent, renamed explicit_binding to explicit_location and added
comment about what it does. Store the location at every shader stage.
Updated data.location comments in ir/nir.h.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/ir.h              |  2 ++
 src/glsl/link_uniforms.cpp | 24 +++++++++++++++++++++---
 src/glsl/nir/nir.h         |  1 +
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index f9ddf7442b0..cf1954b1257 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -819,6 +819,8 @@ public:
        *   - Fragment shader output: one of the values from \c gl_frag_result.
        *   - Uniforms: Per-stage uniform slot number for default uniform block.
        *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Non-UBO Uniforms: explicit location until linking then reused to
+       *     store uniform slot number.
        *   - Other: This field is not currently used.
        *
        * If the variable is a uniform, shader input, or shader output, and the
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 6d277fa43ca..879c4885384 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -527,7 +527,13 @@ public:
                     var->get_interface_type()->name);
          else
             process(var);
-      } else
+      } else {
+         /* Store any explicit location and reset data location so we can
+          * reuse this variable for storing the uniform slot number.
+          */
+         this->explicit_location = current_var->data.location;
+         current_var->data.location = -1;
+
          process(var);
       }
       delete this->record_next_sampler;
@@ -710,6 +716,13 @@ private:
       handle_images(base_type, &this->uniforms[id]);
       handle_subroutines(base_type, &this->uniforms[id]);
 
+      /* For array of arrays or struct arrays the base location may have
+       * already been set so dont set it again.
+       */
+      if (ubo_block_index == -1 && current_var->data.location == -1) {
+         current_var->data.location = id;
+      }
+
       /* If there is already storage associated with this uniform or if the
        * uniform is set as builtin, it means that it was set while processing
        * an earlier shader stage.  For example, we may be processing the
@@ -726,10 +739,10 @@ private:
          if (record_type != NULL) {
             const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
             this->uniforms[id].remap_location =
-               current_var->data.location + field_counter;
+               this->explicit_location + field_counter;
             field_counter += entries;
          } else {
-            this->uniforms[id].remap_location = current_var->data.location;
+         this->uniforms[id].remap_location = this->explicit_location;
          }
       } else {
          /* Initialize to to indicate that no location is set */
@@ -795,6 +808,11 @@ private:
    unsigned next_image;
    unsigned next_subroutine;
 
+   /* Used to store the explicit location from current_var so that we can
+    * reuse the location field for storing the uniform slot id.
+    */
+   int explicit_location;
+
    /* Stores total struct array elements including nested structs */
    unsigned record_array_count;
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index fffb2f45719..3a19bd34ef0 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -278,6 +278,7 @@ typedef struct {
        *   - Fragment shader output: one of the values from \c gl_frag_result.
        *   - Uniforms: Per-stage uniform slot number for default uniform block.
        *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Non-UBO Uniforms: uniform slot number.
        *   - Other: This field is not currently used.
        *
        * If the variable is a uniform, shader input, or shader output, and the
-- 
cgit v1.2.3


From 12af915e27e4f10bc4c29f1cc8119b28ba27d874 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Tue, 1 Sep 2015 15:52:10 +1000
Subject: glsl: make variables private

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/link_uniforms.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 879c4885384..238546ba22e 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -808,6 +808,17 @@ private:
    unsigned next_image;
    unsigned next_subroutine;
 
+   /**
+    * Field counter is used to take care that uniform structures
+    * with explicit locations get sequential locations.
+    */
+   unsigned field_counter;
+
+   /**
+    * Current variable being processed.
+    */
+   ir_variable *current_var;
+
    /* Used to store the explicit location from current_var so that we can
     * reuse the location field for storing the uniform slot id.
     */
@@ -826,17 +837,6 @@ public:
 
    gl_texture_index targets[MAX_SAMPLERS];
 
-   /**
-    * Current variable being processed.
-    */
-   ir_variable *current_var;
-
-   /**
-    * Field counter is used to take care that uniform structures
-    * with explicit locations get sequential locations.
-    */
-   unsigned field_counter;
-
    /**
     * Mask of samplers used by the current shader stage.
     */
-- 
cgit v1.2.3


From 0ad44ce3735aa39391ab866c6a692eb76115b8c1 Mon Sep 17 00:00:00 2001
From: Timothy <t_arceri@yahoo.com.au>
Date: Sat, 12 Sep 2015 07:33:27 +1000
Subject: glsl: add helper for calculating offsets for struct members

V2: update comments

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/glsl_types.cpp | 26 ++++++++++++++++++++++++++
 src/glsl/glsl_types.h   |  8 ++++++++
 2 files changed, 34 insertions(+)

(limited to 'src')

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 755618ac28b..97c79fa4ca1 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -1039,6 +1039,32 @@ glsl_type::component_slots() const
    return 0;
 }
 
+unsigned
+glsl_type::record_location_offset(unsigned length) const
+{
+   unsigned offset = 0;
+   const glsl_type *t = this->without_array();
+   if (t->is_record()) {
+      assert(length <= t->length);
+
+      for (unsigned i = 0; i < length; i++) {
+         const glsl_type *st = t->fields.structure[i].type;
+         const glsl_type *wa = st->without_array();
+         if (wa->is_record()) {
+            unsigned r_offset = wa->record_location_offset(wa->length);
+            offset += st->is_array() ? st->length * r_offset : r_offset;
+         } else {
+            /* We dont worry about arrays here because unless the array
+             * contains a structure or another array it only takes up a single
+             * uniform slot.
+             */
+            offset += 1;
+         }
+      }
+   }
+   return offset;
+}
+
 unsigned
 glsl_type::uniform_locations() const
 {
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 02a398f6112..860276a2b17 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -291,6 +291,14 @@ struct glsl_type {
     */
    unsigned component_slots() const;
 
+   /**
+    * Calculate offset between the base location of the struct in
+    * uniform storage and a struct member.
+    * For the initial call, length is the index of the member to find the
+    * offset for.
+    */
+   unsigned record_location_offset(unsigned length) const;
+
    /**
     * Calculate the number of unique values from glGetUniformLocation for the
     * elements of the type.
-- 
cgit v1.2.3


From ef8eebc6ad5d86e524426f0755c0f7d0b4c0cd3e Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 26 Aug 2015 22:18:36 +1000
Subject: nir: support indirect indexing samplers in struct arrays

As a bonus we get indirect support for arrays of arrays for free.

V5: couple of small clean-ups suggested by Jason.

V4: fix struct member location caclulation, use nir_ssa_def rather than
nir_src for the indirect as suggested by Jason

V3: Use nir_instr_rewrite_src() with empty src rather then clearing
the use_link list directly for the old indirects as suggested by Jason

V2: Fixed validation error in debug build

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_lower_samplers.cpp | 164 +++++++++++++++++++-----------------
 1 file changed, 88 insertions(+), 76 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp
index 9583b457d8d..c8e1faa0334 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -24,6 +24,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 #include "../program.h"
 #include "program/hash_table.h"
 #include "ir_uniform.h"
@@ -35,105 +36,116 @@ extern "C" {
 #include "program/program.h"
 }
 
-static unsigned
-get_sampler_index(const struct gl_shader_program *shader_program,
-                  gl_shader_stage stage, const char *name)
+/* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+static void
+calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                     unsigned *array_elements, nir_ssa_def **indirect,
+                     nir_builder *b, unsigned *location)
 {
-   unsigned location;
-   if (!shader_program->UniformHash->get(location, name)) {
-      assert(!"failed to find sampler");
-      return 0;
+   if (tail->child == NULL)
+      return;
+
+   switch (tail->child->deref_type) {
+   case nir_deref_type_array: {
+      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+      assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      instr->sampler_index += deref_array->base_offset * *array_elements;
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         nir_ssa_def *mul =
+            nir_imul(b, nir_imm_int(b, *array_elements),
+                     nir_ssa_for_src(b, deref_array->indirect, 1));
+
+         nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                               NIR_SRC_INIT);
+
+         if (*indirect) {
+            *indirect = nir_iadd(b, *indirect, mul);
+         } else {
+            *indirect = mul;
+         }
+      }
+
+      *array_elements *= glsl_get_length(tail->type);
+       break;
    }
 
-   if (!shader_program->UniformStorage[location].sampler[stage].active) {
-      assert(!"cannot return a sampler");
-      return 0;
+   case nir_deref_type_struct: {
+      nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+      *location += tail->type->record_location_offset(deref_struct->index);
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      break;
    }
 
-   return shader_program->UniformStorage[location].sampler[stage].index;
+   default:
+      unreachable("Invalid deref type");
+      break;
+   }
 }
 
 static void
 lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
-              gl_shader_stage stage, void *mem_ctx)
+              gl_shader_stage stage, nir_builder *builder)
 {
    if (instr->sampler == NULL)
       return;
 
-   /* Get the name and the offset */
    instr->sampler_index = 0;
-   char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name);
+   unsigned location = instr->sampler->var->data.location;
+   unsigned array_elements = 1;
+   nir_ssa_def *indirect = NULL;
+
+   builder->cursor = nir_before_instr(&instr->instr);
+   calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                        &indirect, builder, &location);
+
+   if (indirect) {
+      /* First, we have to resize the array of texture sources */
+      nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                            instr->num_srcs + 1);
+
+      for (unsigned i = 0; i < instr->num_srcs; i++) {
+         new_srcs[i].src_type = instr->src[i].src_type;
+         nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                            &instr->src[i].src);
+      }
 
-   for (nir_deref *deref = &instr->sampler->deref;
-        deref->child; deref = deref->child) {
-      switch (deref->child->deref_type) {
-      case nir_deref_type_array: {
-         nir_deref_array *deref_array = nir_deref_as_array(deref->child);
+      ralloc_free(instr->src);
+      instr->src = new_srcs;
 
-         assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+      /* Now we can go ahead and move the source over to being a
+       * first-class texture source.
+       */
+      instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+      instr->num_srcs++;
+      nir_instr_rewrite_src(&instr->instr,
+                            &instr->src[instr->num_srcs - 1].src,
+                            nir_src_for_ssa(indirect));
 
-         if (deref_array->deref.child) {
-            ralloc_asprintf_append(&name, "[%u]",
-               deref_array->deref_array_type == nir_deref_array_type_direct ?
-                  deref_array->base_offset : 0);
-         } else {
-            assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER);
-            instr->sampler_index = deref_array->base_offset;
-         }
-
-         /* XXX: We're assuming here that the indirect is the last array
-          * thing we have.  This should be ok for now as we don't support
-          * arrays_of_arrays yet.
-          */
-         if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
-            /* First, we have to resize the array of texture sources */
-            nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
-                                                  instr->num_srcs + 1);
-
-            for (unsigned i = 0; i < instr->num_srcs; i++) {
-               new_srcs[i].src_type = instr->src[i].src_type;
-               nir_instr_move_src(&instr->instr, &new_srcs[i].src,
-                                  &instr->src[i].src);
-            }
-
-            ralloc_free(instr->src);
-            instr->src = new_srcs;
-
-            /* Now we can go ahead and move the source over to being a
-             * first-class texture source.
-             */
-            instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
-            instr->num_srcs++;
-            nir_instr_move_src(&instr->instr,
-                               &instr->src[instr->num_srcs - 1].src,
-                               &deref_array->indirect);
-
-            instr->sampler_array_size = glsl_get_length(deref->type);
-         }
-         break;
-      }
-
-      case nir_deref_type_struct: {
-         nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child);
-         const char *field = glsl_get_struct_elem_name(deref->type,
-                                                       deref_struct->index);
-         ralloc_asprintf_append(&name, ".%s", field);
-         break;
-      }
+      instr->sampler_array_size = array_elements;
+   }
 
-      default:
-         unreachable("Invalid deref type");
-         break;
-      }
+   if (location > shader_program->NumUniformStorage - 1 ||
+       !shader_program->UniformStorage[location].sampler[stage].active) {
+      assert(!"cannot return a sampler");
+      return;
    }
 
-   instr->sampler_index += get_sampler_index(shader_program, stage, name);
+   instr->sampler_index +=
+      shader_program->UniformStorage[location].sampler[stage].index;
 
    instr->sampler = NULL;
 }
 
 typedef struct {
-   void *mem_ctx;
+   nir_builder builder;
    const struct gl_shader_program *shader_program;
    gl_shader_stage stage;
 } lower_state;
@@ -147,7 +159,7 @@ lower_block_cb(nir_block *block, void *_state)
       if (instr->type == nir_instr_type_tex) {
          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
          lower_sampler(tex_instr, state->shader_program, state->stage,
-                       state->mem_ctx);
+                       &state->builder);
       }
    }
 
@@ -160,7 +172,7 @@ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_progr
 {
    lower_state state;
 
-   state.mem_ctx = ralloc_parent(impl);
+   nir_builder_init(&state.builder, impl);
    state.shader_program = shader_program;
    state.stage = stage;
 
-- 
cgit v1.2.3


From 8f1ae9abeba6b6eaf0c14f4c78c674e5afbc38aa Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Mon, 14 Sep 2015 10:46:01 +0300
Subject: mesa: enable query of PROGRAM_PIPELINE_BINDING for ES 3.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Specified in OpenGL ES 3.1 spec, Table 23.32: Program Object State.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/get_hash_params.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index c06835a1e64..e2fb2a1e3c9 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -459,6 +459,9 @@ descriptor=[
 # GL_ARB_explicit_uniform_location / GLES 3.1
   [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
 
+# GL_ARB_separate_shader_objects / GLES 3.1
+  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
+
 # GL_ARB_vertex_attrib_binding / GLES 3.1
   [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
   [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
@@ -803,9 +806,6 @@ descriptor=[
 # GL_ARB_texture_gather
   [ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
 
-# GL_ARB_separate_shader_objects
-  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
-
 # GL_ARB_shader_atomic_counters
   [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
   [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-- 
cgit v1.2.3


From d9689be5c67e71518ef8687fd7fc4bc6df25b93f Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Tue, 1 Sep 2015 13:53:43 +0300
Subject: mesa: return initial value for PROGRAM_SEPARABLE when not linked
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From OpenGL ES 3.1 spec (7.12):

    "Most properties set within program objects are specified not to
    take effect until the next call to LinkProgram or ProgramBinary.
    Some properties further require a successful call to either of
    these commands before taking effect. GetProgramiv returns the
    properties currently in effect for program, which may differ from
    the properties set within program since the most recent call to
    LinkProgram or ProgramBinary, which have not yet taken effect. If
    there has been no such call putting changes to pname into effect,
    initial values are returned."

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/shaderapi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 7680b5875b2..f31980b40d9 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -771,7 +771,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
       return;
    }
    case GL_PROGRAM_SEPARABLE:
-      *params = shProg->SeparateShader;
+      /* If the program has not been linked, return initial value 0. */
+      *params = (shProg->LinkStatus == GL_FALSE) ? 0 : shProg->SeparateShader;
       return;
 
    /* ARB_tessellation_shader */
-- 
cgit v1.2.3


From ba02f7a3b6a0e4314753a8e5080db61241563f9c Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Tue, 1 Sep 2015 13:53:44 +0300
Subject: mesa: return initial value for VALIDATE_STATUS if pipe not bound
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From OpenGL 4.5 Core spec (7.13):

    "If pipeline is a name that has been generated (without subsequent
    deletion) by GenProgramPipelines, but refers to a program pipeline
    object that has not been previously bound, the GL first creates a
    new state vector in the same manner as when BindProgramPipeline
    creates a new program pipeline object."

I interpret this as "If GetProgramPipelineiv gets called without a
bound (but valid) pipeline object, the state should reflect initial
state of a new pipeline object." This is also expected behaviour by
ES31-CTS.sepshaderobjs.PipelineApi conformance test.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/pipelineobj.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 07acbf10c1d..c2e1d29ad80 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -614,7 +614,8 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
       *params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0;
       return;
    case GL_VALIDATE_STATUS:
-      *params = pipe->Validated;
+      /* If pipeline is not bound, return initial value 0. */
+      *params = (ctx->_Shader->Name != pipe->Name) ? 0 : pipe->Validated;
       return;
    case GL_VERTEX_SHADER:
       *params = pipe->CurrentProgram[MESA_SHADER_VERTEX]
-- 
cgit v1.2.3


From 7e2865064916b85243788fc69040bb981f53c4f9 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 16 Sep 2015 05:53:13 -0700
Subject: st/xa: Use PIPE_FORMAT_R8_UNORM when available

XA has been using L8_UNORM for a8 and yuv component surfaces.
This commit instead makes XA prefer R8_UNORM since it's assumed to have a
higher availability.

Also neither of these formats are suitable as destination formats using
destination alpha blending, so reject those operations.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/state_trackers/xa/xa_composite.c | 40 ++++++++++------------------
 src/gallium/state_trackers/xa/xa_tracker.c   | 28 +++++++++++++------
 2 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c
index 7cfd1e136d1..e81eebaf541 100644
--- a/src/gallium/state_trackers/xa/xa_composite.c
+++ b/src/gallium/state_trackers/xa/xa_composite.c
@@ -78,26 +78,6 @@ static const struct xa_composite_blend xa_blends[] = {
       0, 0, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ONE},
 };
 
-
-/*
- * The alpha value stored in a luminance texture is read by the
- * hardware as color.
- */
-static unsigned
-xa_convert_blend_for_luminance(unsigned factor)
-{
-    switch(factor) {
-    case PIPE_BLENDFACTOR_DST_ALPHA:
-	return PIPE_BLENDFACTOR_DST_COLOR;
-    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-	return PIPE_BLENDFACTOR_INV_DST_COLOR;
-    default:
-	break;
-    }
-    return factor;
-}
-
-
 static boolean
 blend_for_op(struct xa_composite_blend *blend,
 	     enum xa_composite_op op,
@@ -131,10 +111,16 @@ blend_for_op(struct xa_composite_blend *blend,
     if (!dst_pic->srf)
 	return supported;
 
-    if (dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM) {
-	blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
-	blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
-    }
+    /*
+     * None of the hardware formats we might use for dst A8 are
+     * suitable for dst_alpha blending, since they present the
+     * alpha channel either in all color channels (L8_UNORM) or
+     * in the red channel only (R8_UNORM)
+     */
+    if ((dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM ||
+         dst_pic->srf->tex->format == PIPE_FORMAT_R8_UNORM) &&
+        blend->alpha_dst)
+        return FALSE;
 
     /*
      * If there's no dst alpha channel, adjust the blend op so that we'll treat
@@ -298,7 +284,8 @@ picture_format_fixups(struct xa_picture *src_pic,
 	ret |= mask ? FS_MASK_SET_ALPHA : FS_SRC_SET_ALPHA;
 
     if (src_hw_format == src_pic_format) {
-	if (src->tex->format == PIPE_FORMAT_L8_UNORM)
+	if (src->tex->format == PIPE_FORMAT_L8_UNORM ||
+            src->tex->format == PIPE_FORMAT_R8_UNORM)
 	    return ((mask) ? FS_MASK_LUMINANCE : FS_SRC_LUMINANCE);
 
 	return ret;
@@ -372,7 +359,8 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp)
 	fs_traits |= picture_format_fixups(mask_pic, 1);
     }
 
-    if (ctx->srf->format == PIPE_FORMAT_L8_UNORM)
+    if (ctx->srf->format == PIPE_FORMAT_L8_UNORM ||
+        ctx->srf->format == PIPE_FORMAT_R8_UNORM)
 	fs_traits |= FS_DST_LUMINANCE;
 
     shader = xa_shaders_get(ctx->shaders, vs_traits, fs_traits);
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index 2944b16858c..4fdbdc96ae6 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -82,7 +82,7 @@ static const unsigned int stype_bind[XA_LAST_SURFACE_TYPE] = { 0,
 };
 
 static struct xa_format_descriptor
-xa_get_pipe_format(enum xa_formats xa_format)
+xa_get_pipe_format(struct xa_tracker *xa, enum xa_formats xa_format)
 {
     struct xa_format_descriptor fdesc;
 
@@ -102,7 +102,13 @@ xa_get_pipe_format(enum xa_formats xa_format)
 	fdesc.format = PIPE_FORMAT_B5G5R5A1_UNORM;
 	break;
     case xa_format_a8:
-	fdesc.format = PIPE_FORMAT_L8_UNORM;
+        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+                                            PIPE_TEXTURE_2D, 0,
+                                            stype_bind[xa_type_a] |
+                                            PIPE_BIND_RENDER_TARGET))
+            fdesc.format = PIPE_FORMAT_R8_UNORM;
+        else
+            fdesc.format = PIPE_FORMAT_L8_UNORM;
 	break;
     case xa_format_z24:
 	fdesc.format = PIPE_FORMAT_Z24X8_UNORM;
@@ -126,7 +132,12 @@ xa_get_pipe_format(enum xa_formats xa_format)
 	fdesc.format = PIPE_FORMAT_S8_UINT_Z24_UNORM;
 	break;
     case xa_format_yuv8:
-	fdesc.format = PIPE_FORMAT_L8_UNORM;
+        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+                                            PIPE_TEXTURE_2D, 0,
+                                            stype_bind[xa_type_yuv_component]))
+            fdesc.format = PIPE_FORMAT_R8_UNORM;
+        else
+            fdesc.format = PIPE_FORMAT_L8_UNORM;
 	break;
     default:
 	fdesc.xa_format = xa_format_unknown;
@@ -184,7 +195,8 @@ xa_tracker_create(int drm_fd)
 	for (i = 0; i < num_preferred[stype]; ++i) {
 	    xa_format = preferred[stype][i];
 
-	    struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+	    struct xa_format_descriptor fdesc =
+                xa_get_pipe_format(xa, xa_format);
 
 	    if (xa->screen->is_format_supported(xa->screen, fdesc.format,
 						PIPE_TEXTURE_2D, 0, bind)) {
@@ -259,7 +271,7 @@ xa_get_format_stype_depth(struct xa_tracker *xa,
     int found = 0;
 
     for (i = xa->format_map[stype][0]; i <= xa->format_map[stype][1]; ++i) {
-	fdesc = xa_get_pipe_format(xa->supported_formats[i]);
+	fdesc = xa_get_pipe_format(xa, xa->supported_formats[i]);
 	if (fdesc.xa_format != xa_format_unknown &&
 	    xa_format_depth(fdesc.xa_format) == depth) {
 	    found = 1;
@@ -277,7 +289,7 @@ XA_EXPORT int
 xa_format_check_supported(struct xa_tracker *xa,
 			  enum xa_formats xa_format, unsigned int flags)
 {
-    struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+    struct xa_format_descriptor fdesc = xa_get_pipe_format(xa, xa_format);
     unsigned int bind;
 
     if (fdesc.xa_format == xa_format_unknown)
@@ -328,7 +340,7 @@ surface_create(struct xa_tracker *xa,
     if (xa_format == xa_format_unknown)
 	fdesc = xa_get_format_stype_depth(xa, stype, depth);
     else
-	fdesc = xa_get_pipe_format(xa_format);
+	fdesc = xa_get_pipe_format(xa, xa_format);
 
     if (fdesc.xa_format == xa_format_unknown)
 	return NULL;
@@ -440,7 +452,7 @@ xa_surface_redefine(struct xa_surface *srf,
     if (xa_format == xa_format_unknown)
 	fdesc = xa_get_format_stype_depth(xa, stype, depth);
     else
-	fdesc = xa_get_pipe_format(xa_format);
+	fdesc = xa_get_pipe_format(xa, xa_format);
 
     if (width == template->width0 && height == template->height0 &&
 	template->format == fdesc.format &&
-- 
cgit v1.2.3


From bd016a2601a741799bc76734deae0cb9ebcb2b8f Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <uweigand@de.ibm.com>
Date: Tue, 15 Sep 2015 15:23:26 +0200
Subject: mesa: Fix texture compression on big-endian systems

Various pieces of code to create compressed textures will first
generate an uncompressed RGBA texture into a temporary buffer,
and then read from that buffer while creating the final compressed
texture in the requested format.

The code reading from the temporary buffer assumes the buffer is
formatted as an array of bytes in RGBA order.  However, the buffer
is filled using a _mesa_texstore call with MESA_FORMAT_R8G8B8A8_UNORM
format -- this is defined as an array of *integers* holding the
RGBA values in packed format (least-significant to most-significant).
This means incorrect bytes are accessed on big-endian systems.

This patch fixes this by using the MESA_FORMAT_A8B8G8R8_UNORM format
instead on big-endian systems when filling the buffer.  This fixes
about 100 piglit test case failures on s390x for me.

Signed-off-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tested-by: Oded Gabbay <oded.gabbay@gmail.com>
Cc: "10.6" "11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@gmail.com>
---
 src/mesa/main/texcompress_bptc.c | 3 ++-
 src/mesa/main/texcompress_fxt1.c | 3 ++-
 src/mesa/main/texcompress_rgtc.c | 6 ++++--
 src/mesa/main/texcompress_s3tc.c | 9 ++++++---
 4 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c
index a600180e236..f0f6553a01b 100644
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -1291,7 +1291,8 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
       tempImageSlices[0] = (GLubyte *) tempImage;
       _mesa_texstore(ctx, dims,
                      baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                      rgbaRowStride, tempImageSlices,
                      srcWidth, srcHeight, srcDepth,
                      srcFormat, srcType, srcAddr,
diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
index d605e255962..ae339e11d39 100644
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -130,7 +130,8 @@ _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
       tempImageSlices[0] = (GLubyte *) tempImage;
       _mesa_texstore(ctx, dims,
                      baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                      rgbaRowStride, tempImageSlices,
                      srcWidth, srcHeight, srcDepth,
                      srcFormat, srcType, srcAddr,
diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
index 66de1f17d8b..8cab7a56b1a 100644
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -196,9 +196,11 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
           dstFormat == MESA_FORMAT_LA_LATC2_UNORM);
 
    if (baseInternalFormat == GL_RG)
-      tempFormat = MESA_FORMAT_R8G8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_R8G8_UNORM
+                                         : MESA_FORMAT_G8R8_UNORM;
    else
-      tempFormat = MESA_FORMAT_L8A8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_L8A8_UNORM
+                                         : MESA_FORMAT_A8L8_UNORM;
 
    rgRowStride = 2 * srcWidth * sizeof(GLubyte);
    tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c
index 6cfe06a9910..7ddb0ed38c0 100644
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -198,7 +198,8 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
       tempImageSlices[0] = (GLubyte *) tempImage;
       _mesa_texstore(ctx, dims,
                      baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                      rgbaRowStride, tempImageSlices,
                      srcWidth, srcHeight, srcDepth,
                      srcFormat, srcType, srcAddr,
@@ -255,7 +256,8 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
       tempImageSlices[0] = (GLubyte *) tempImage;
       _mesa_texstore(ctx, dims,
                      baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                      rgbaRowStride, tempImageSlices,
                      srcWidth, srcHeight, srcDepth,
                      srcFormat, srcType, srcAddr,
@@ -311,7 +313,8 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
       tempImageSlices[0] = (GLubyte *) tempImage;
       _mesa_texstore(ctx, dims,
                      baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                      rgbaRowStride, tempImageSlices,
                      srcWidth, srcHeight, srcDepth,
                      srcFormat, srcType, srcAddr,
-- 
cgit v1.2.3


From 8ecaef967d6f2775bf7114d857baf3aaa13643a7 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 8 Sep 2015 16:43:51 -0700
Subject: nir: Add a generic instruction index

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.c | 18 ++++++++++++++++++
 src/glsl/nir/nir.h |  4 ++++
 2 files changed, 22 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 96cf94c11d5..1ff8f217407 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1435,6 +1435,24 @@ nir_index_ssa_defs(nir_function_impl *impl)
    impl->ssa_alloc = index;
 }
 
+static bool
+index_instrs_block(nir_block *block, void *state)
+{
+   unsigned *index = state;
+   nir_foreach_instr(block, instr)
+      instr->index = (*index)++;
+
+   return true;
+}
+
+unsigned
+nir_index_instrs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   nir_foreach_block(impl, index_instrs_block, &index);
+   return index;
+}
+
 gl_system_value
 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3a19bd34ef0..37d36822679 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -418,6 +418,9 @@ typedef struct nir_instr {
    nir_instr_type type;
    struct nir_block *block;
 
+   /** generic instruction index. */
+   unsigned index;
+
    /* A temporary for optimization and analysis passes to use for storing
     * flags.  For instance, DCE uses this to store the "dead/live" info.
     */
@@ -1773,6 +1776,7 @@ nir_loop *nir_block_get_following_loop(nir_block *block);
 void nir_index_local_regs(nir_function_impl *impl);
 void nir_index_global_regs(nir_shader *shader);
 void nir_index_ssa_defs(nir_function_impl *impl);
+unsigned nir_index_instrs(nir_function_impl *impl);
 
 void nir_index_blocks(nir_function_impl *impl);
 
-- 
cgit v1.2.3


From ddffe30f404b7dffef2b90879023433ecc7df3e3 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 14 Sep 2015 12:25:28 -0700
Subject: nir: Add comments to nir_index_instrs and nir_index_ssa_defs

The provided indices have the very nice property that if A dominates B then
A->index <= B->index.  We should document that somewhere.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 1ff8f217407..23071eff65b 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1427,6 +1427,10 @@ index_ssa_block(nir_block *block, void *state)
    return true;
 }
 
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
 void
 nir_index_ssa_defs(nir_function_impl *impl)
 {
@@ -1445,6 +1449,10 @@ index_instrs_block(nir_block *block, void *state)
    return true;
 }
 
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
 unsigned
 nir_index_instrs(nir_function_impl *impl)
 {
-- 
cgit v1.2.3


From a6c467d6c574baf6636814b51f1dafe2557e22f6 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 8 Sep 2015 15:18:01 -0700
Subject: nir: Add a pass to rewrite uses of vecN sources to the vecN
 destination

v2 (Jason Ekstrand):
 - Handle non-SSA sources and destinations

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/Makefile.sources                    |   1 +
 src/glsl/nir/nir.h                           |   1 +
 src/glsl/nir/nir_move_vec_src_uses_to_dest.c | 197 +++++++++++++++++++++++++++
 3 files changed, 199 insertions(+)
 create mode 100644 src/glsl/nir/nir_move_vec_src_uses_to_dest.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index da7fdf95902..07877bb7993 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -50,6 +50,7 @@ NIR_FILES = \
 	nir/nir_lower_var_copies.c \
 	nir/nir_lower_vec_to_movs.c \
 	nir/nir_metadata.c \
+	nir/nir_move_vec_src_uses_to_dest.c \
 	nir/nir_normalize_cubemap_coords.c \
 	nir/nir_opt_constant_folding.c \
 	nir/nir_opt_copy_propagate.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 37d36822679..f64bb2837c3 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1825,6 +1825,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);
 
 void nir_remove_dead_variables(nir_shader *shader);
 
+void nir_move_vec_src_uses_to_dest(nir_shader *shader);
 void nir_lower_vec_to_movs(nir_shader *shader);
 void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_load_const_to_scalar(nir_shader *shader);
diff --git a/src/glsl/nir/nir_move_vec_src_uses_to_dest.c b/src/glsl/nir/nir_move_vec_src_uses_to_dest.c
new file mode 100644
index 00000000000..4c9032dfaf3
--- /dev/null
+++ b/src/glsl/nir/nir_move_vec_src_uses_to_dest.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that tries to move uses vecN sources to their
+ * destinations.  This is kind of like an inverse copy-propagation pass.
+ * For instance, if you have
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(a, b)
+ *
+ * This will be turned into
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(ssa_1.x, ssa_1.y)
+ *
+ * While this is "worse" because it adds a bunch of unneeded dependencies, it
+ * actually makes it much easier for vec4-based backends to coalesce the MOV's
+ * that result from the vec4 operation because it doesn't have to worry about
+ * quite as many reads.
+ */
+
+/* Returns true if the given SSA def dominates the instruction.  An SSA def is
+ * considered to *not* dominate the instruction that defines it.
+ */
+static bool
+ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
+{
+   if (instr->index <= def->parent_instr->index) {
+      return false;
+   } else if (def->parent_instr->block == instr->block) {
+      return def->parent_instr->index < instr->index;
+   } else {
+      return nir_block_dominates(def->parent_instr->block, instr->block);
+   }
+}
+
+static bool
+move_vec_src_uses_to_dest_block(nir_block *block, void *shader)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *vec = nir_instr_as_alu(instr);
+
+      switch (vec->op) {
+      case nir_op_vec2:
+      case nir_op_vec3:
+      case nir_op_vec4:
+         break;
+      default:
+         continue; /* The loop */
+      }
+
+      /* Can't handle non-SSA vec operations */
+      if (!vec->dest.dest.is_ssa)
+         continue;
+
+      /* Can't handle saturation */
+      if (vec->dest.saturate)
+         continue;
+
+      /* First, mark all of the sources we are going to consider for rewriting
+       * to the destination
+       */
+      int srcs_remaining = 0;
+      for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
+         /* We can't rewrite a source if it's not in SSA form */
+         if (!vec->src[i].src.is_ssa)
+            continue;
+
+         /* We can't rewrite a source if it has modifiers */
+         if (vec->src[i].abs || vec->src[i].negate)
+            continue;
+
+         srcs_remaining |= 1 << i;
+      }
+
+      /* We can't actually do anything with this instruction */
+      if (srcs_remaining == 0)
+         continue;
+
+      for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
+         int8_t swizzle[4] = { -1, -1, -1, -1 };
+
+         for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
+            if (vec->src[j].src.ssa != vec->src[i].src.ssa)
+               continue;
+
+            /* Mark the given chanle as having been handled */
+            srcs_remaining &= ~(1 << j);
+
+            /* Mark the appropreate channel as coming from src j */
+            swizzle[vec->src[j].swizzle[0]] = j;
+         }
+
+         nir_foreach_use_safe(vec->src[i].src.ssa, use) {
+            if (use->parent_instr == &vec->instr)
+               continue;
+
+            /* We need to dominate the use if we are going to rewrite it */
+            if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
+               continue;
+
+            /* For now, we'll just rewrite ALU instructions */
+            if (use->parent_instr->type != nir_instr_type_alu)
+               continue;
+
+            assert(use->is_ssa);
+
+            nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
+
+            /* Figure out which source we're actually looking at */
+            nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
+            unsigned src_idx = use_alu_src - use_alu->src;
+            assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
+
+            bool can_reswizzle = true;
+            for (unsigned j = 0; j < 4; j++) {
+               if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+                  continue;
+
+               if (swizzle[use_alu_src->swizzle[j]] == -1) {
+                  can_reswizzle = false;
+                  break;
+               }
+            }
+
+            if (!can_reswizzle)
+               continue;
+
+            /* At this point, we have determined that the given use can be
+             * reswizzled to actually use the destination of the vecN operation.
+             * Go ahead and rewrite it as needed.
+             */
+            nir_instr_rewrite_src(use->parent_instr, use,
+                                  nir_src_for_ssa(&vec->dest.dest.ssa));
+            for (unsigned j = 0; j < 4; j++) {
+               if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+                  continue;
+
+               use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
+            }
+         }
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
+{
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_index_instrs(impl);
+   nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_move_vec_src_uses_to_dest(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_move_vec_src_uses_to_dest_impl(shader, overload->impl);
+   }
+}
-- 
cgit v1.2.3


From fc11dbe13f3470ff2a4cb91c6b063db2456664da Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 8 Sep 2015 16:45:57 -0700
Subject: i965/vec4: Use nir_move_vec_src_uses_to_dest

The idea here is not that it gives register coalescing a little bit of a
helping hand.  It doesn't actually fix the coalescing problems, but it
seems to help a good bit.

Shader-db results for vec4 programs on Haswell:

   total instructions in shared programs: 1746280 -> 1683959 (-3.57%)
   instructions in affected programs:     1259166 -> 1196845 (-4.95%)
   helped:                                11363
   HURT:                                  148

v2 (Jason Ekstrand):
 - Run nir_move_vec_src_uses_to_dest after going out of SSA
 - New shader-db numbers

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index f326b239d74..9a0bbb06847 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -187,6 +187,9 @@ brw_create_nir(struct brw_context *brw,
    nir_validate_shader(nir);
 
    if (!is_scalar) {
+      nir_move_vec_src_uses_to_dest(nir);
+      nir_validate_shader(nir);
+
       nir_lower_vec_to_movs(nir);
       nir_validate_shader(nir);
    }
-- 
cgit v1.2.3


From 8d3b92af21afb58b6a65e18bb05785d7aae72c27 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Sun, 30 Aug 2015 15:07:33 +0800
Subject: vc4: Try to pair up instructions when only one of them has PM bit

Instructions with difference in PM field can actually be paired up if
the one without PM doesn't do packing/unpacking and non-NOP
packing/unpacking operations from PM instruction aren't added to the
other without PM.

total instructions in shared programs: 48209 -> 47460 (-1.55%)
instructions in affected programs:     11688 -> 10939 (-6.41%)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/gallium/drivers/vc4/vc4_qpu.c | 123 +++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 47 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c
index f67e3f8b768..6aa6b24d94c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -454,8 +454,7 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                                 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 
         /* Misc fields that have to match exactly. */
-        ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
-                                ~0);
+        ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
 
         if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
                           QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
@@ -493,64 +492,94 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                         return 0;
         }
 
-        /* packing: Make sure that non-NOP packs agree, then deal with
-         * special-case failing of adding a non-NOP pack to something with a
-         * NOP pack.
-         */
-        if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
-                return 0;
-        bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
-                           QPU_GET_FIELD(merge, QPU_PACK));
-        bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
-                           QPU_GET_FIELD(merge, QPU_PACK));
-        if (!(merge & QPU_PM)) {
-                /* Make sure we're not going to be putting a new
-                 * a-file packing on either half.
+        if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
+                /* If one instruction has PM bit set and the other not, the
+                 * one without PM shouldn't do packing/unpacking, and we
+                 * have to make sure non-NOP packing/unpacking from PM
+                 * instruction aren't added to it.
                  */
-                if (new_a_pack && writes_a_file(a))
-                        return 0;
+                uint64_t temp;
 
-                if (new_b_pack && writes_a_file(b))
-                        return 0;
-        } else {
-                /* Make sure we're not going to be putting new MUL packing on
-                 * either half.
-                 */
-                if (new_a_pack && QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
-                        return 0;
+                /* Let a be the one with PM bit */
+                if (!(a & QPU_PM)) {
+                        temp = a;
+                        a = b;
+                        b = temp;
+                }
 
-                if (new_b_pack && QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+                if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
                         return 0;
-        }
 
-        /* unpacking: Make sure that non-NOP unpacks agree, then deal with
-         * special-case failing of adding a non-NOP unpack to something with a
-         * NOP unpack.
-         */
-        if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
-                return 0;
-        bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
-                             QPU_GET_FIELD(merge, QPU_UNPACK));
-        bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
-                             QPU_GET_FIELD(merge, QPU_UNPACK));
-        if (!(merge & QPU_PM)) {
-                /* Make sure we're not going to be putting a new
-                 * a-file packing on either half.
-                 */
-                if (new_a_unpack && QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+                if ((a & QPU_PACK_MASK) != 0 &&
+                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
                         return 0;
 
-                if (new_b_unpack && QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+                if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
                         return 0;
         } else {
-                /* Make sure we're not going to be putting new r4 unpack on
-                 * either half.
+                /* packing: Make sure that non-NOP packs agree, then deal with
+                 * special-case failing of adding a non-NOP pack to something
+                 * with a NOP pack.
                  */
-                if (new_a_unpack && reads_r4(a))
+                if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
                         return 0;
+                bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
+                                QPU_GET_FIELD(merge, QPU_PACK));
+                bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
+                                QPU_GET_FIELD(merge, QPU_PACK));
+                if (!(merge & QPU_PM)) {
+                        /* Make sure we're not going to be putting a new
+                         * a-file packing on either half.
+                         */
+                        if (new_a_pack && writes_a_file(a))
+                                return 0;
+
+                        if (new_b_pack && writes_a_file(b))
+                                return 0;
+                } else {
+                        /* Make sure we're not going to be putting new MUL
+                         * packing oneither half.
+                         */
+                        if (new_a_pack &&
+                            QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
+                                return 0;
+
+                        if (new_b_pack &&
+                            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+                                return 0;
+                }
 
-                if (new_b_unpack && reads_r4(b))
+                /* unpacking: Make sure that non-NOP unpacks agree, then deal
+                 * with special-case failing of adding a non-NOP unpack to
+                 * something with a NOP unpack.
+                 */
+                if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
                         return 0;
+                bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
+                                QPU_GET_FIELD(merge, QPU_UNPACK));
+                bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
+                                QPU_GET_FIELD(merge, QPU_UNPACK));
+                if (!(merge & QPU_PM)) {
+                        /* Make sure we're not going to be putting a new
+                         * a-file packing on either half.
+                         */
+                        if (new_a_unpack &&
+                            QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+                                return 0;
+
+                        if (new_b_unpack &&
+                            QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+                                return 0;
+                } else {
+                        /* Make sure we're not going to be putting new r4
+                         * unpack on either half.
+                         */
+                        if (new_a_unpack && reads_r4(a))
+                                return 0;
+
+                        if (new_b_unpack && reads_r4(b))
+                                return 0;
+                }
         }
 
         if (ok)
-- 
cgit v1.2.3


From 323c91250682ac931941047f282a613c74b1ba26 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 15 Sep 2015 01:32:40 -0400
Subject: nv50,nvc0: detect underlying resource changes and update tic

When updating texture buffers, we might end up replacing the whole
buffer. Check that the tic address matches the resource address, and if
not, update the tic and reupload it.

This fixes:
  arb_direct_state_access-texture-buffer
  arb_texture_buffer_object-data-sync

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_tex.c | 21 +++++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 22 ++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index fc6374d1b1b..6083ea995c8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -221,6 +221,26 @@ nv50_create_texture_view(struct pipe_context *pipe,
    return &view->pipe;
 }
 
+static void
+nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nv50_screen_tic_unlock(nv50->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
 static bool
 nv50_validate_tic(struct nv50_context *nv50, int s)
 {
@@ -240,6 +260,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
          continue;
       }
       res = &nv50_miptree(tic->pipe.texture)->base;
+      nv50_update_tic(nv50, tic, res);
 
       if (tic->id < 0) {
          tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index d19082e0e15..2dd100ffdc7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -226,6 +226,26 @@ nvc0_create_texture_view(struct pipe_context *pipe,
    return &view->pipe;
 }
 
+static void
+nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nvc0_screen_tic_unlock(nvc0->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
 static bool
 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 {
@@ -247,6 +267,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
          continue;
       }
       res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);
 
       if (tic->id < 0) {
          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
@@ -313,6 +334,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
          continue;
       }
       res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);
 
       if (tic->id < 0) {
          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
-- 
cgit v1.2.3


From e844e1007d3baac09ff2cc78879d6974be18ecaf Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 16 Sep 2015 22:17:18 -0400
Subject: nv50,nvc0: flush texture cache in presence of coherent bufs

This fixes the newly-added arb_texture_buffer_object-bufferstorage
piglit test.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 19 +++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 20 ++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index e7984734af9..f5f47087bef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -768,6 +768,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
    struct nv50_context *nv50 = nv50_context(pipe);
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   bool tex_dirty = false;
    int i, s;
 
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -797,6 +798,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 
    push->kick_notify = nv50_draw_vbo_kick_notify;
 
+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
    for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
       uint32_t valid = nv50->constbuf_valid[s];
 
@@ -824,6 +828,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nv50->cb_dirty = false;
    }
 
+   for (s = 0; s < 3 && !tex_dirty; ++s) {
+      for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
+         if (!nv50->textures[s][i] ||
+             nv50->textures[s][i]->texture->target != PIPE_BUFFER)
+            continue;
+         if (nv50->textures[s][i]->texture->flags &
+             PIPE_RESOURCE_FLAG_MAP_COHERENT)
+            tex_dirty = true;
+      }
+   }
+   if (tex_dirty) {
+      BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+      PUSH_DATA (push, 0x20);
+   }
+
    if (nv50->vbo_fifo) {
       nv50_push_vbo(nv50, info);
       push->kick_notify = nv50_default_kick_notify;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 6f9e7906713..188c7d7cdc8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -899,6 +899,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 
    push->kick_notify = nvc0_draw_vbo_kick_notify;
 
+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
    for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
       uint32_t valid = nvc0->constbuf_valid[s];
 
@@ -924,6 +927,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nvc0->cb_dirty = false;
    }
 
+   for (s = 0; s < 5; ++s) {
+      for (int i = 0; i < nvc0->num_textures[s]; ++i) {
+         struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+         struct pipe_resource *res;
+         if (!tic)
+            continue;
+         res = nvc0->textures[s][i]->texture;
+         if (res->target != PIPE_BUFFER ||
+             !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+            continue;
+
+         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+         PUSH_DATA (push, (tic->id << 4) | 1);
+         NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+      }
+   }
+
    if (nvc0->state.vbo_mode) {
       nvc0_push_vbo(nvc0, info);
       push->kick_notify = nvc0_default_kick_notify;
-- 
cgit v1.2.3


From e523f69b1d2f0cb3ff7659e3c55b9a2e40240c9c Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 10 Sep 2015 17:25:18 -0400
Subject: freedreno/ir3: switch to shader_enums.h interp constants

A small step towards un-TGSI'ifying ir3.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_program.c   |  5 ++-
 src/gallium/drivers/freedreno/a4xx/fd4_program.c   |  5 ++-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 40 +++++-----------------
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     | 11 +++---
 4 files changed, 20 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index b5360797745..ef8a849617f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -394,7 +394,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 
 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
-			uint32_t interp = fp->inputs[j].interpolate;
 
 			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
 			 * instead.. rather than -8 everywhere else..
@@ -406,8 +405,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 			 */
 			debug_assert((inloc % 4) == 0);
 
-			if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-					((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+			if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+					(fp->inputs[j].rasterflat && emit->rasterflat)) {
 				uint32_t loc = inloc;
 				for (i = 0; i < 4; i++, loc++) {
 					vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index a3d7123ccee..619eb860111 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -492,7 +492,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 		 */
 		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
 		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
-			uint32_t interp = s[FS].v->inputs[j].interpolate;
 
 			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
 			 * instead.. rather than -8 everywhere else..
@@ -504,8 +503,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 			 */
 			debug_assert((inloc % 4) == 0);
 
-			if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-					((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+			if ((s[FS].v->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
 				uint32_t loc = inloc;
 
 				for (i = 0; i < 4; i++, loc++) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 83a138515b5..e4dbe64f753 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1359,7 +1359,7 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
 	so->inputs[n].semantic = ir3_semantic_name(name, 0);
 	so->inputs[n].compmask = 1;
 	so->inputs[n].regid = r;
-	so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+	so->inputs[n].interpolate = INTERP_QUALIFIER_FLAT;
 	so->total_in++;
 
 	ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
@@ -2141,23 +2141,9 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 
 	so->inputs[n].compmask = (1 << ncomp) - 1;
 	so->inputs[n].inloc = ctx->next_inloc;
-	so->inputs[n].interpolate = 0;
+	so->inputs[n].interpolate = INTERP_QUALIFIER_NONE;
 	so->inputs_count = MAX2(so->inputs_count, n + 1);
-
-	/* the fdN_program_emit() code expects tgsi consts here, so map
-	 * things back to tgsi for now:
-	 */
-	switch (in->data.interpolation) {
-	case INTERP_QUALIFIER_FLAT:
-		so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
-		break;
-	case INTERP_QUALIFIER_NOPERSPECTIVE:
-		so->inputs[n].interpolate = TGSI_INTERPOLATE_LINEAR;
-		break;
-	case INTERP_QUALIFIER_SMOOTH:
-		so->inputs[n].interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
-		break;
-	}
+	so->inputs[n].interpolate = in->data.interpolation;
 
 	if (ctx->so->type == SHADER_FRAGMENT) {
 		unsigned semantic_name, semantic_index;
@@ -2183,27 +2169,19 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 			} else {
 				bool use_ldlv = false;
 
-				/* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
-				 * from the semantic name:
+				/* detect the special case for front/back colors where
+				 * we need to do flat vs smooth shading depending on
+				 * rast state:
 				 */
 				if ((in->data.interpolation == INTERP_QUALIFIER_NONE) &&
 						((semantic_name == TGSI_SEMANTIC_COLOR) ||
 							(semantic_name == TGSI_SEMANTIC_BCOLOR)))
-					so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR;
+					so->inputs[n].rasterflat = true;
 
 				if (ctx->flat_bypass) {
-					/* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
-					 * from the semantic name:
-					 */
-					switch (so->inputs[n].interpolate) {
-					case TGSI_INTERPOLATE_COLOR:
-						if (!ctx->so->key.rasterflat)
-							break;
-						/* fallthrough */
-					case TGSI_INTERPOLATE_CONSTANT:
+					if ((so->inputs[n].interpolate == INTERP_QUALIFIER_FLAT) ||
+							(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
 						use_ldlv = true;
-						break;
-					}
 				}
 
 				so->inputs[n].bary = true;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 1bbbdbd224d..13b3f6a2a85 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -30,6 +30,7 @@
 #define IR3_SHADER_H_
 
 #include "pipe/p_state.h"
+#include "glsl/shader_enums.h"
 
 #include "ir3.h"
 #include "disasm.h"
@@ -82,8 +83,8 @@ struct ir3_shader_key {
 			 */
 			unsigned color_two_side : 1;
 			unsigned half_precision : 1;
-			/* used when shader needs to handle flat varyings (a4xx),
-			 * for TGSI_INTERPOLATE_COLOR:
+			/* used when shader needs to handle flat varyings (a4xx)
+			 * for front/back color inputs to frag shader:
 			 */
 			unsigned rasterflat : 1;
 		};
@@ -174,8 +175,10 @@ struct ir3_shader_variant {
 		 * spots where inloc is used.
 		 */
 		uint8_t inloc;
-		uint8_t bary;
-		uint8_t interpolate;
+		/* fragment shader specfic: */
+		bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
+		bool    rasterflat : 1;   /* special handling for emit->rasterflat */
+		enum glsl_interp_qualifier interpolate;
 	} inputs[16 + 2];  /* +POSITION +FACE */
 
 	unsigned total_in;       /* sum of inputs (scalar) */
-- 
cgit v1.2.3


From 4a121e1a90dc34677d9a68e8877dfe53f1e889a2 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 11 Sep 2015 17:01:23 -0400
Subject: glsl: add SYSTEM_VALUE_VERTEX_CNT

Used internally in freedreno/ir3 to calc stream-out position.  Seems
like a generic enough way to implement stream-out (using str instrs),
plus it avoids compiler warnings by sneaking in a non-enum value in
switch statements.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/shader_enums.c | 1 +
 src/glsl/shader_enums.h | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'src')

diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c
index 530fd9e4081..c196b791d4f 100644
--- a/src/glsl/shader_enums.c
+++ b/src/glsl/shader_enums.c
@@ -169,6 +169,7 @@ const char * gl_system_value_name(gl_system_value sysval)
      ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
      ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
      ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+     ENUM(SYSTEM_VALUE_VERTEX_CNT),
    };
    return NAME(sysval);
 }
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 385ca972809..39780076e74 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -412,6 +412,12 @@ typedef enum
    SYSTEM_VALUE_WORK_GROUP_ID,
    /*@}*/
 
+   /**
+    * Driver internal vertex-count, used (for example) for drivers to
+    * calculate stride for stream-out outputs.  Not externally visible.
+    */
+   SYSTEM_VALUE_VERTEX_CNT,
+
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
-- 
cgit v1.2.3


From c4572b7dfe7a4ae9dc6e900f89786fa9cf7769df Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 11 Sep 2015 17:20:48 -0400
Subject: freedreno/ir3: convert from tgsi semantic/index to varying-slot

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c      |  30 ++++--
 src/gallium/drivers/freedreno/a3xx/fd3_program.c   |  49 +++++----
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c      |  31 ++++--
 src/gallium/drivers/freedreno/a4xx/fd4_program.c   |  56 +++++------
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 112 ++++++++++-----------
 src/gallium/drivers/freedreno/ir3/ir3_shader.c     |  89 +++++++++++-----
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     |  60 +++++------
 7 files changed, 234 insertions(+), 193 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index dbddb293a9c..6153d92dc21 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -351,15 +351,27 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 	unsigned vtxcnt_regid = regid(63, 0);
 
 	for (i = 0; i < vp->inputs_count; i++) {
-		uint8_t semantic = sem2name(vp->inputs[i].semantic);
-		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-			vertex_regid = vp->inputs[i].regid;
-		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-			instance_regid = vp->inputs[i].regid;
-		else if (semantic == IR3_SEMANTIC_VTXCNT)
-			vtxcnt_regid = vp->inputs[i].regid;
-		else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+		if (vp->inputs[i].sysval) {
+			switch(vp->inputs[i].slot) {
+			case SYSTEM_VALUE_BASE_VERTEX:
+				/* handled elsewhere */
+				break;
+			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+				vertex_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_INSTANCE_ID:
+				instance_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_VERTEX_CNT:
+				vtxcnt_regid = vp->inputs[i].regid;
+				break;
+			default:
+				unreachable("invalid system value");
+				break;
+			}
+		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
 			last = i;
+		}
 	}
 
 	/* hw doesn't like to be configured for zero vbo's, it seems: */
@@ -370,7 +382,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 		return;
 
 	for (i = 0, j = 0; i <= last; i++) {
-		assert(sem2name(vp->inputs[i].semantic) == 0);
+		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
 			const struct pipe_vertex_buffer *vb =
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index ef8a849617f..4ed04b38dea 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -194,24 +194,17 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
 	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
 
-	pos_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	posz_regid = ir3_find_output_regid(fp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	psize_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+	pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+	posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+	psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
 	if (fp->color0_mrt) {
 		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
-			ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+			ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
 	} else {
-		for (i = 0; i < fp->outputs_count; i++) {
-			ir3_semantic sem = fp->outputs[i].semantic;
-			unsigned idx = sem2idx(sem);
-			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-				continue;
-			debug_assert(idx < ARRAY_SIZE(color_regid));
-			color_regid[idx] = fp->outputs[i].regid;
-		}
+		color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+		color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+		color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+		color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
 	}
 
 	/* adjust regids for alpha output formats. there is no alpha render
@@ -280,14 +273,14 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 
 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
 		}
 
 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
 		}
@@ -414,14 +407,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 				}
 			}
 
-			/* Replace the .xy coordinates with S/T from the point sprite. Set
-			 * interpolation bits for .zw such that they become .01
-			 */
-			if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
-				vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-					<< ((inloc % 16) * 2);
-				vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-				vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+			gl_varying_slot slot = fp->inputs[j].slot;
+
+			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+			if (slot >= VARYING_SLOT_VAR0) {
+				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+				/* Replace the .xy coordinates with S/T from the point sprite. Set
+				 * interpolation bits for .zw such that they become .01
+				 */
+				if (emit->sprite_coord_enable & texmask) {
+					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+							<< ((inloc % 16) * 2);
+					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+				}
 			}
 		}
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 5f36cef3e9a..c7ed1d2e379 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -333,17 +333,30 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 	unsigned vtxcnt_regid = regid(63, 0);
 
 	for (i = 0; i < vp->inputs_count; i++) {
-		uint8_t semantic = sem2name(vp->inputs[i].semantic);
-		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-			vertex_regid = vp->inputs[i].regid;
-		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-			instance_regid = vp->inputs[i].regid;
-		else if (semantic == IR3_SEMANTIC_VTXCNT)
-			vtxcnt_regid = vp->inputs[i].regid;
-		else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
+		if (vp->inputs[i].sysval) {
+			switch(vp->inputs[i].slot) {
+			case SYSTEM_VALUE_BASE_VERTEX:
+				/* handled elsewhere */
+				break;
+			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+				vertex_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_INSTANCE_ID:
+				instance_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_VERTEX_CNT:
+				vtxcnt_regid = vp->inputs[i].regid;
+				break;
+			default:
+				unreachable("invalid system value");
+				break;
+			}
+		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
 			last = i;
+		}
 	}
 
+
 	/* hw doesn't like to be configured for zero vbo's, it seems: */
 	if ((vtx->vtx->num_elements == 0) &&
 			(vertex_regid == regid(63, 0)) &&
@@ -352,7 +365,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 		return;
 
 	for (i = 0, j = 0; i <= last; i++) {
-		assert(sem2name(vp->inputs[i].semantic) == 0);
+		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
 			const struct pipe_vertex_buffer *vb =
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 619eb860111..e3d5dabab4c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -227,27 +227,22 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 	/* blob seems to always use constmode currently: */
 	constmode = 1;
 
-	pos_regid = ir3_find_output_regid(s[VS].v,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	posz_regid = ir3_find_output_regid(s[FS].v,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	psize_regid = ir3_find_output_regid(s[VS].v,
-		ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+	posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
 	if (s[FS].v->color0_mrt) {
 		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
 		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
-			ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
 	} else {
-		const struct ir3_shader_variant *fp = s[FS].v;
-		memset(color_regid, 0, sizeof(color_regid));
-		for (i = 0; i < fp->outputs_count; i++) {
-			ir3_semantic sem = fp->outputs[i].semantic;
-			unsigned idx = sem2idx(sem);
-			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-				continue;
-			debug_assert(idx < ARRAY_SIZE(color_regid));
-			color_regid[idx] = fp->outputs[i].regid;
-		}
+		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
 	}
 
 	/* adjust regids for alpha output formats. there is no alpha render
@@ -257,7 +252,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 		if (util_format_is_alpha(pipe_surface_format(bufs[i])))
 			color_regid[i] += 3;
 
-
 	/* TODO get these dynamically: */
 	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
 	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
@@ -348,14 +342,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 
 		j = ir3_next_varying(s[FS].v, j);
 		if (j < s[FS].v->inputs_count) {
-			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
 			reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
 			reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
 		}
 
 		j = ir3_next_varying(s[FS].v, j);
 		if (j < s[FS].v->inputs_count) {
-			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
 			reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
 			reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
 		}
@@ -513,14 +507,20 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 				}
 			}
 
-			/* Replace the .xy coordinates with S/T from the point sprite. Set
-			 * interpolation bits for .zw such that they become .01
-			 */
-			if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
-				vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-					<< ((inloc % 16) * 2);
-				vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-				vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+			gl_varying_slot slot = s[FS].v->inputs[j].slot;
+
+			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+			if (slot >= VARYING_SLOT_VAR0) {
+				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+				/* Replace the .xy coordinates with S/T from the point sprite. Set
+				 * interpolation bits for .zw such that they become .01
+				 */
+				if (emit->sprite_coord_enable & texmask) {
+					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+						<< ((inloc % 16) * 2);
+					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+				}
 			}
 		}
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index e4dbe64f753..17bac4106e9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1349,14 +1349,15 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	}
 }
 
-static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
+static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
 		struct ir3_instruction *instr)
 {
 	struct ir3_shader_variant *so = ctx->so;
 	unsigned r = regid(so->inputs_count, 0);
 	unsigned n = so->inputs_count++;
 
-	so->inputs[n].semantic = ir3_semantic_name(name, 0);
+	so->inputs[n].sysval = true;
+	so->inputs[n].slot = slot;
 	so->inputs[n].compmask = 1;
 	so->inputs[n].regid = r;
 	so->inputs[n].interpolate = INTERP_QUALIFIER_FLAT;
@@ -1437,7 +1438,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	case nir_intrinsic_load_base_vertex:
 		if (!ctx->basevertex) {
 			ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
-			add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
+			add_sysval_input(ctx, SYSTEM_VALUE_BASE_VERTEX,
 					ctx->basevertex);
 		}
 		dst[0] = ctx->basevertex;
@@ -1445,7 +1446,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	case nir_intrinsic_load_vertex_id_zero_base:
 		if (!ctx->vertex_id) {
 			ctx->vertex_id = create_input(ctx->block, 0);
-			add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
+			add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
 					ctx->vertex_id);
 		}
 		dst[0] = ctx->vertex_id;
@@ -1453,7 +1454,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	case nir_intrinsic_load_instance_id:
 		if (!ctx->instance_id) {
 			ctx->instance_id = create_input(ctx->block, 0);
-			add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
+			add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
 					ctx->instance_id);
 		}
 		dst[0] = ctx->instance_id;
@@ -2021,7 +2022,7 @@ emit_stream_out(struct ir3_compile *ctx)
 	 * of the shader:
 	 */
 	vtxcnt = create_input(ctx->in_block, 0);
-	add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+	add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
 
 	maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
 
@@ -2139,6 +2140,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 	DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
 			slot, array_len, ncomp, n);
 
+	so->inputs[n].slot = slot;
 	so->inputs[n].compmask = (1 << ncomp) - 1;
 	so->inputs[n].inloc = ctx->next_inloc;
 	so->inputs[n].interpolate = INTERP_QUALIFIER_NONE;
@@ -2146,23 +2148,15 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 	so->inputs[n].interpolate = in->data.interpolation;
 
 	if (ctx->so->type == SHADER_FRAGMENT) {
-		unsigned semantic_name, semantic_index;
-
-		varying_slot_to_tgsi_semantic(slot,
-				&semantic_name, &semantic_index);
-
-		so->inputs[n].semantic =
-				ir3_semantic_name(semantic_name, semantic_index);
-
 		for (int i = 0; i < ncomp; i++) {
 			struct ir3_instruction *instr = NULL;
 			unsigned idx = (n * 4) + i;
 
-			if (semantic_name == TGSI_SEMANTIC_POSITION) {
+			if (slot == VARYING_SLOT_POS) {
 				so->inputs[n].bary = false;
 				so->frag_coord = true;
 				instr = create_frag_coord(ctx, i);
-			} else if (semantic_name == TGSI_SEMANTIC_FACE) {
+			} else if (slot == VARYING_SLOT_FACE) {
 				so->inputs[n].bary = false;
 				so->frag_face = true;
 				instr = create_frag_face(ctx, i);
@@ -2173,10 +2167,18 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 				 * we need to do flat vs smooth shading depending on
 				 * rast state:
 				 */
-				if ((in->data.interpolation == INTERP_QUALIFIER_NONE) &&
-						((semantic_name == TGSI_SEMANTIC_COLOR) ||
-							(semantic_name == TGSI_SEMANTIC_BCOLOR)))
-					so->inputs[n].rasterflat = true;
+				if (in->data.interpolation == INTERP_QUALIFIER_NONE) {
+					switch (slot) {
+					case VARYING_SLOT_COL0:
+					case VARYING_SLOT_COL1:
+					case VARYING_SLOT_BFC0:
+					case VARYING_SLOT_BFC1:
+						so->inputs[n].rasterflat = true;
+						break;
+					default:
+						break;
+					}
+				}
 
 				if (ctx->flat_bypass) {
 					if ((so->inputs[n].interpolate == INTERP_QUALIFIER_FLAT) ||
@@ -2193,7 +2195,6 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
 			ctx->ir->inputs[idx] = instr;
 		}
 	} else if (ctx->so->type == SHADER_VERTEX) {
-		so->inputs[n].semantic = 0;
 		for (int i = 0; i < ncomp; i++) {
 			unsigned idx = (n * 4) + i;
 			ctx->ir->inputs[idx] = create_input(ctx->block, idx);
@@ -2214,7 +2215,6 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 	struct ir3_shader_variant *so = ctx->so;
 	unsigned array_len = MAX2(glsl_get_length(out->type), 1);
 	unsigned ncomp = glsl_get_components(out->type);
-	unsigned semantic_name, semantic_index;
 	unsigned n = out->data.driver_location;
 	unsigned slot = out->data.location;
 	unsigned comp = 0;
@@ -2222,45 +2222,42 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 	DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
 			slot, array_len, ncomp, n);
 
-	if (ctx->so->type == SHADER_VERTEX) {
-		varying_slot_to_tgsi_semantic(slot,
-				&semantic_name, &semantic_index);
-
-		switch (semantic_name) {
-		case TGSI_SEMANTIC_POSITION:
+	if (ctx->so->type == SHADER_FRAGMENT) {
+		switch (slot) {
+		case FRAG_RESULT_DEPTH:
+			comp = 2;  /* tgsi will write to .z component */
 			so->writes_pos = true;
 			break;
-		case TGSI_SEMANTIC_PSIZE:
-			so->writes_psize = true;
-			break;
-		case TGSI_SEMANTIC_COLOR:
-		case TGSI_SEMANTIC_BCOLOR:
-		case TGSI_SEMANTIC_GENERIC:
-		case TGSI_SEMANTIC_FOG:
-		case TGSI_SEMANTIC_TEXCOORD:
+		case FRAG_RESULT_COLOR:
+			so->color0_mrt = 1;
 			break;
 		default:
-			compile_error(ctx, "unknown VS semantic name: %s\n",
-					tgsi_semantic_names[semantic_name]);
+			if (slot >= FRAG_RESULT_DATA0)
+				break;
+			compile_error(ctx, "unknown FS output name: %s\n",
+					gl_frag_result_name(slot));
 		}
-	} else if (ctx->so->type == SHADER_FRAGMENT) {
-		frag_result_to_tgsi_semantic(slot,
-				&semantic_name, &semantic_index);
-
-		switch (semantic_name) {
-		case TGSI_SEMANTIC_POSITION:
-			comp = 2;  /* tgsi will write to .z component */
+	} else if (ctx->so->type == SHADER_VERTEX) {
+		switch (slot) {
+		case VARYING_SLOT_POS:
 			so->writes_pos = true;
 			break;
-		case TGSI_SEMANTIC_COLOR:
-			if (semantic_index == -1) {
-				semantic_index = 0;
-				so->color0_mrt = 1;
-			}
+		case VARYING_SLOT_PSIZ:
+			so->writes_psize = true;
+			break;
+		case VARYING_SLOT_COL0:
+		case VARYING_SLOT_COL1:
+		case VARYING_SLOT_BFC0:
+		case VARYING_SLOT_BFC1:
+		case VARYING_SLOT_FOGC:
 			break;
 		default:
-			compile_error(ctx, "unknown FS semantic name: %s\n",
-					tgsi_semantic_names[semantic_name]);
+			if (slot >= VARYING_SLOT_VAR0)
+				break;
+			if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
+				break;
+			compile_error(ctx, "unknown VS output name: %s\n",
+					gl_varying_slot_name(slot));
 		}
 	} else {
 		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
@@ -2268,8 +2265,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 
 	compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
 
-	so->outputs[n].semantic =
-			ir3_semantic_name(semantic_name, semantic_index);
+	so->outputs[n].slot = slot;
 	so->outputs[n].regid = regid(n, comp);
 	so->outputs_count = MAX2(so->outputs_count, n + 1);
 
@@ -2462,12 +2458,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	/* at this point, for binning pass, throw away unneeded outputs: */
 	if (so->key.binning_pass) {
 		for (i = 0, j = 0; i < so->outputs_count; i++) {
-			unsigned name = sem2name(so->outputs[i].semantic);
-			unsigned idx = sem2idx(so->outputs[i].semantic);
+			unsigned slot = so->outputs[i].slot;
 
 			/* throw away everything but first position/psize */
-			if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
-					(name == TGSI_SEMANTIC_PSIZE))) {
+			if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
 				if (i != j) {
 					so->outputs[j] = so->outputs[i];
 					ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
@@ -2566,7 +2560,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		 * but what we give the hw is the scalar register:
 		 */
 		if ((so->type == SHADER_FRAGMENT) &&
-			(sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
+			(so->outputs[i].slot == FRAG_RESULT_DEPTH))
 			so->outputs[i].regid += 2;
 	}
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 312174c0c6d..7b250509135 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -300,11 +300,11 @@ static void dump_reg(const char *name, uint32_t r)
 		debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
 }
 
-static void dump_semantic(struct ir3_shader_variant *so,
-		unsigned sem, const char *name)
+static void dump_output(struct ir3_shader_variant *so,
+		unsigned slot, const char *name)
 {
 	uint32_t regid;
-	regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+	regid = ir3_find_output_regid(so, slot);
 	dump_reg(name, regid);
 }
 
@@ -355,27 +355,51 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
 
 	disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
 
-	debug_printf("; %s: outputs:", type);
-	for (i = 0; i < so->outputs_count; i++) {
-		uint8_t regid = so->outputs[i].regid;
-		ir3_semantic sem = so->outputs[i].semantic;
-		debug_printf(" r%d.%c (%u:%u)",
-				(regid >> 2), "xyzw"[regid & 0x3],
-				sem2name(sem), sem2idx(sem));
-	}
-	debug_printf("\n");
-	debug_printf("; %s: inputs:", type);
-	for (i = 0; i < so->inputs_count; i++) {
-		uint8_t regid = so->inputs[i].regid;
-		ir3_semantic sem = so->inputs[i].semantic;
-		debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
-				(regid >> 2), "xyzw"[regid & 0x3],
-				sem2name(sem), sem2idx(sem),
-				so->inputs[i].compmask,
-				so->inputs[i].inloc,
-				so->inputs[i].bary);
+	switch (so->type) {
+	case SHADER_VERTEX:
+		debug_printf("; %s: outputs:", type);
+		for (i = 0; i < so->outputs_count; i++) {
+			uint8_t regid = so->outputs[i].regid;
+			debug_printf(" r%d.%c (%s)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_varying_slot_name(so->outputs[i].slot));
+		}
+		debug_printf("\n");
+		debug_printf("; %s: inputs:", type);
+		for (i = 0; i < so->inputs_count; i++) {
+			uint8_t regid = so->inputs[i].regid;
+			debug_printf(" r%d.%c (cm=%x,il=%u,b=%u)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					so->inputs[i].compmask,
+					so->inputs[i].inloc,
+					so->inputs[i].bary);
+		}
+		debug_printf("\n");
+		break;
+	case SHADER_FRAGMENT:
+		debug_printf("; %s: outputs:", type);
+		for (i = 0; i < so->outputs_count; i++) {
+			uint8_t regid = so->outputs[i].regid;
+			debug_printf(" r%d.%c (%s)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_frag_result_name(so->outputs[i].slot));
+		}
+		debug_printf("\n");
+		debug_printf("; %s: inputs:", type);
+		for (i = 0; i < so->inputs_count; i++) {
+			uint8_t regid = so->inputs[i].regid;
+			debug_printf(" r%d.%c (%s,cm=%x,il=%u,b=%u)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_varying_slot_name(so->inputs[i].slot),
+					so->inputs[i].compmask,
+					so->inputs[i].inloc,
+					so->inputs[i].bary);
+		}
+		debug_printf("\n");
+		break;
+	case SHADER_COMPUTE:
+		break;
 	}
-	debug_printf("\n");
 
 	/* print generic shader info: */
 	debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
@@ -391,13 +415,24 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
 	/* print shader type specific info: */
 	switch (so->type) {
 	case SHADER_VERTEX:
-		dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
-		dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+		dump_output(so, VARYING_SLOT_POS, "pos");
+		dump_output(so, VARYING_SLOT_PSIZ, "psize");
 		break;
 	case SHADER_FRAGMENT:
 		dump_reg("pos (bary)", so->pos_regid);
-		dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
-		dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+		dump_output(so, FRAG_RESULT_DEPTH, "posz");
+		if (so->color0_mrt) {
+			dump_output(so, FRAG_RESULT_COLOR, "color");
+		} else {
+			dump_output(so, FRAG_RESULT_DATA0, "data0");
+			dump_output(so, FRAG_RESULT_DATA1, "data1");
+			dump_output(so, FRAG_RESULT_DATA2, "data2");
+			dump_output(so, FRAG_RESULT_DATA3, "data3");
+			dump_output(so, FRAG_RESULT_DATA4, "data4");
+			dump_output(so, FRAG_RESULT_DATA5, "data5");
+			dump_output(so, FRAG_RESULT_DATA6, "data6");
+			dump_output(so, FRAG_RESULT_DATA7, "data7");
+		}
 		/* these two are hard-coded since we don't know how to
 		 * program them to anything but all 0's...
 		 */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 13b3f6a2a85..39b8864329b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -41,28 +41,6 @@ enum ir3_driver_param {
 	IR3_DP_VTXCNT_MAX = 1,
 };
 
-/* internal semantic used for passing vtxcnt to vertex shader to
- * implement transform feedback:
- */
-#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
-
-typedef uint16_t ir3_semantic;  /* semantic name + index */
-static inline ir3_semantic
-ir3_semantic_name(uint8_t name, uint16_t index)
-{
-	return (name << 8) | (index & 0xff);
-}
-
-static inline uint8_t sem2name(ir3_semantic sem)
-{
-	return sem >> 8;
-}
-
-static inline uint16_t sem2idx(ir3_semantic sem)
-{
-	return sem & 0xff;
-}
-
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
@@ -148,10 +126,16 @@ struct ir3_shader_variant {
 	uint8_t pos_regid;
 	bool frag_coord, frag_face, color0_mrt;
 
+	/* NOTE: for input/outputs, slot is:
+	 *   gl_vert_attrib  - for VS inputs
+	 *   gl_varying_slot - for VS output / FS input
+	 *   gl_frag_result  - for FS output
+	 */
+
 	/* varyings/outputs: */
 	unsigned outputs_count;
 	struct {
-		ir3_semantic semantic;
+		uint8_t slot;
 		uint8_t regid;
 	} outputs[16 + 2];  /* +POSITION +PSIZE */
 	bool writes_pos, writes_psize;
@@ -159,7 +143,7 @@ struct ir3_shader_variant {
 	/* vertices/inputs: */
 	unsigned inputs_count;
 	struct {
-		ir3_semantic semantic;
+		uint8_t slot;
 		uint8_t regid;
 		uint8_t compmask;
 		uint8_t ncomp;
@@ -175,7 +159,9 @@ struct ir3_shader_variant {
 		 * spots where inloc is used.
 		 */
 		uint8_t inloc;
-		/* fragment shader specfic: */
+		/* vertex shader specific: */
+		bool    sysval     : 1;   /* slot is a gl_system_value */
+		/* fragment shader specific: */
 		bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
 		bool    rasterflat : 1;   /* special handling for emit->rasterflat */
 		enum glsl_interp_qualifier interpolate;
@@ -257,12 +243,12 @@ ir3_shader_stage(struct ir3_shader *shader)
 #include "pipe/p_shader_tokens.h"
 
 static inline int
-ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
 {
 	int j;
 
 	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].semantic == semantic)
+		if (so->outputs[j].slot == slot)
 			return j;
 
 	/* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
@@ -272,18 +258,20 @@ ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
 	 * OUT.COLOR[n] to IN.BCOLOR[n].  And visa versa if there is only
 	 * a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
 	 */
-	if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
-		unsigned idx = sem2idx(semantic);
-		semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx);
-	} else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) {
-		unsigned idx = sem2idx(semantic);
-		semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx);
+	if (slot == VARYING_SLOT_BFC0) {
+		slot = VARYING_SLOT_COL0;
+	} else if (slot == VARYING_SLOT_BFC1) {
+		slot = VARYING_SLOT_COL1;
+	} else if (slot == VARYING_SLOT_COL0) {
+		slot = VARYING_SLOT_BFC0;
+	} else if (slot == VARYING_SLOT_COL1) {
+		slot = VARYING_SLOT_BFC1;
 	} else {
 		return 0;
 	}
 
 	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].semantic == semantic)
+		if (so->outputs[j].slot == slot)
 			return j;
 
 	debug_assert(0);
@@ -301,11 +289,11 @@ ir3_next_varying(const struct ir3_shader_variant *so, int i)
 }
 
 static inline uint32_t
-ir3_find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
 {
 	int j;
 	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].semantic == semantic)
+		if (so->outputs[j].slot == slot)
 			return so->outputs[j].regid;
 	return regid(63, 0);
 }
-- 
cgit v1.2.3


From 53671a37238fd34f08d91d5d079d26437f21a473 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 27 Aug 2015 17:42:40 -0400
Subject: nir: add sysval for user-clip-planes

For lowering user-clip-planes, we need a way to pass the enabled/used
user-clip-planes in to shader.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/glsl/nir/nir_intrinsics.h | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b981df1a71b..70cae4256a3 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -128,21 +128,22 @@ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
 INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
-#define SYSTEM_VALUE(name, components) \
-   INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+#define SYSTEM_VALUE(name, components, num_indices) \
+   INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
-SYSTEM_VALUE(front_face, 1)
-SYSTEM_VALUE(vertex_id, 1)
-SYSTEM_VALUE(vertex_id_zero_base, 1)
-SYSTEM_VALUE(base_vertex, 1)
-SYSTEM_VALUE(instance_id, 1)
-SYSTEM_VALUE(sample_id, 1)
-SYSTEM_VALUE(sample_pos, 2)
-SYSTEM_VALUE(sample_mask_in, 1)
-SYSTEM_VALUE(invocation_id, 1)
-SYSTEM_VALUE(local_invocation_id, 3)
-SYSTEM_VALUE(work_group_id, 3)
+SYSTEM_VALUE(front_face, 1, 0)
+SYSTEM_VALUE(vertex_id, 1, 0)
+SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+SYSTEM_VALUE(base_vertex, 1, 0)
+SYSTEM_VALUE(instance_id, 1, 0)
+SYSTEM_VALUE(sample_id, 1, 0)
+SYSTEM_VALUE(sample_pos, 2, 0)
+SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(local_invocation_id, 3, 0)
+SYSTEM_VALUE(work_group_id, 3, 0)
+SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,
-- 
cgit v1.2.3


From 509e0c4505813ebeaad1c95822156ed2df627487 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 9 Sep 2015 14:57:15 -0400
Subject: nir: add lowering stage for user-clip-planes / clipdist

The vertex shader lowering adds calculation for CLIPDIST, if needed
(ie. user-clip-planes), and the frag shader lowering adds conditional
kills based on CLIPDIST value (which should be treated as a normal
interpolated varying by the driver).

Note that this won't quite do the right thing in the face of MSAA plus
user-clip-planes, since all the samples would be killed or not (rather
than potentially only a portion of them).  But it's better than no UCP
support at all for drivers that don't have this in hw.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/Makefile.sources     |   1 +
 src/glsl/nir/nir.h            |   3 +
 src/glsl/nir/nir_lower_clip.c | 340 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 344 insertions(+)
 create mode 100644 src/glsl/nir/nir_lower_clip.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 07877bb7993..5134ab73800 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -35,6 +35,7 @@ NIR_FILES = \
 	nir/nir_live_variables.c \
 	nir/nir_lower_alu_to_scalar.c \
 	nir/nir_lower_atomics.c \
+	nir/nir_lower_clip.c \
 	nir/nir_lower_global_vars_to_local.c \
 	nir/nir_lower_load_const_to_scalar.c \
 	nir/nir_lower_locals_to_regs.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f64bb2837c3..916226791e6 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1839,6 +1839,9 @@ void nir_lower_system_values(nir_shader *shader);
 void nir_lower_tex_projector(nir_shader *shader);
 void nir_lower_idiv(nir_shader *shader);
 
+void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
new file mode 100644
index 00000000000..94d12b77af4
--- /dev/null
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_CLIP_PLANES 8
+
+/* Generates the lowering code for user-clip-planes, generating CLIPDIST
+ * from UCP[n] + CLIPVERTEX or POSITION.  Additionally, an optional pass
+ * for fragment shaders to insert conditional kill's based on the inter-
+ * polated CLIPDIST
+ *
+ * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at
+ * least in scenarios where you can count on each output written once
+ * and only once).
+ */
+
+
+static nir_variable *
+create_clipdist_var(nir_shader *shader, unsigned drvloc,
+                    bool output, gl_varying_slot slot)
+{
+   nir_variable *var = rzalloc(shader, nir_variable);
+
+   var->data.driver_location = drvloc;
+   var->type = glsl_vec4_type();
+   var->data.mode = output ? nir_var_shader_out : nir_var_shader_in;
+   var->name = ralloc_asprintf(var, "clipdist_%d", drvloc);
+   var->data.index = 0;
+   var->data.location = slot;
+
+   if (output) {
+      exec_list_push_tail(&shader->outputs, &var->node);
+   }
+   else {
+      exec_list_push_tail(&shader->inputs, &var->node);
+   }
+   return var;
+}
+
+static void
+store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
+{
+   nir_intrinsic_instr *store;
+
+   store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+   store->num_components = 4;
+   store->const_index[0] = out->data.driver_location;
+   store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
+   store->src[0].is_ssa = true;
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
+{
+   nir_intrinsic_instr *load;
+
+   load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+   load->num_components = 4;
+   load->const_index[0] = in->data.driver_location;
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_builder_instr_insert(b, &load->instr);
+
+   val[0] = nir_channel(b, &load->dest.ssa, 0);
+   val[1] = nir_channel(b, &load->dest.ssa, 1);
+   val[2] = nir_channel(b, &load->dest.ssa, 2);
+   val[3] = nir_channel(b, &load->dest.ssa, 3);
+}
+
+struct find_output_state
+{
+   unsigned drvloc;
+   nir_ssa_def *def;
+};
+
+static bool
+find_output_in_block(nir_block *block, void *void_state)
+{
+   struct find_output_state *state = void_state;
+   nir_foreach_instr(block, instr) {
+
+      if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if ((intr->intrinsic == nir_intrinsic_store_output) &&
+             intr->const_index[0] == state->drvloc) {
+            assert(state->def == NULL);
+            assert(intr->src[0].is_ssa);
+            state->def = intr->src[0].ssa;
+
+#if !defined(DEBUG)
+            /* for debug builds, scan entire shader to assert
+             * if output is written multiple times.  For release
+             * builds just assume all is well and bail when we
+             * find first:
+             */
+            return false;
+#endif
+         }
+      }
+   }
+
+   return true;
+}
+
+/* TODO: maybe this would be a useful helper?
+ * NOTE: assumes each output is written exactly once (and unconditionally)
+ * so if needed nir_lower_outputs_to_temporaries()
+ */
+static nir_ssa_def *
+find_output(nir_shader *shader, unsigned drvloc)
+{
+   struct find_output_state state = {
+      .drvloc = drvloc,
+   };
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block_reverse(overload->impl,
+                                   find_output_in_block, &state);
+      }
+   }
+
+   return state.def;
+}
+
+/*
+ * VS lowering
+ */
+
+static void
+lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
+              nir_ssa_def *cv, nir_variable **out)
+{
+   nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+
+   /* NIR should ensure that, even in case of loops/if-else, there
+    * should be only a single predecessor block to end_block, which
+    * makes the perfect place to insert the clipdist calculations.
+    *
+    * NOTE: in case of early return's, these would have to be lowered
+    * to jumps to end_block predecessor in a previous pass.  Not sure
+    * if there is a good way to sanity check this, but for now the
+    * users of this pass don't support sub-routines.
+    */
+   assert(impl->end_block->predecessors->entries == 1);
+   b.cursor = nir_after_cf_list(&impl->body);
+
+   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+      if (ucp_enables & (1 << plane)) {
+         nir_intrinsic_instr *ucp;
+
+         /* insert intrinsic to fetch ucp[plane]: */
+         ucp = nir_intrinsic_instr_create(b.shader,
+                                          nir_intrinsic_load_user_clip_plane);
+         ucp->num_components = 4;
+         ucp->const_index[0] = plane;
+         nir_ssa_dest_init(&ucp->instr, &ucp->dest, 4, NULL);
+         nir_builder_instr_insert(&b, &ucp->instr);
+
+         /* calculate clipdist[plane] - dot(ucp, cv): */
+         clipdist[plane] = nir_fdot4(&b, &ucp->dest.ssa, cv);
+      }
+      else {
+         /* 0.0 == don't-clip == disabled: */
+         clipdist[plane] = nir_imm_float(&b, 0.0);
+      }
+   }
+
+   if (ucp_enables & 0x0f)
+      store_clipdist_output(&b, out[0], &clipdist[0]);
+   if (ucp_enables & 0xf0)
+      store_clipdist_output(&b, out[1], &clipdist[4]);
+
+   nir_metadata_preserve(impl, nir_metadata_dominance);
+}
+
+/* ucp_enables is bitmask of enabled ucp's.  Actual ucp values are
+ * passed in to shader via user_clip_plane system-values
+ */
+void
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
+{
+   int clipvertex = -1;
+   int position = -1;
+   int maxloc = -1;
+   nir_ssa_def *cv;
+   nir_variable *out[2];
+
+   if (!ucp_enables)
+      return;
+
+   /* find clipvertex/position outputs: */
+   foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending CLIP_DIST0/CLIP_DIST1 after last existing
+       * output:
+       */
+      maxloc = MAX2(maxloc, loc);
+
+      switch (var->data.location) {
+      case VARYING_SLOT_POS:
+         position = loc;
+         break;
+      case VARYING_SLOT_CLIP_VERTEX:
+         clipvertex = loc;
+         break;
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+         /* if shader is already writing CLIPDIST, then
+          * there should be no user-clip-planes to deal
+          * with.
+          */
+         return;
+      }
+   }
+
+   if (clipvertex != -1)
+      cv = find_output(shader, clipvertex);
+   else if (position != -1)
+      cv = find_output(shader, position);
+   else
+      return;
+
+   /* insert CLIPDIST outputs: */
+   if (ucp_enables & 0x0f)
+      out[0] =
+         create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0);
+   if (ucp_enables & 0xf0)
+      out[1] =
+         create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);
+
+   nir_foreach_overload(shader, overload) {
+      if (!strcmp(overload->function->name, "main"))
+         lower_clip_vs(overload->impl, ucp_enables, cv, out);
+   }
+}
+
+/*
+ * FS lowering
+ */
+
+static void
+lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables,
+              nir_variable **in)
+{
+   nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+   b.cursor = nir_before_cf_list(&impl->body);
+
+   if (ucp_enables & 0x0f)
+      load_clipdist_input(&b, in[0], &clipdist[0]);
+   if (ucp_enables & 0xf0)
+      load_clipdist_input(&b, in[1], &clipdist[4]);
+
+   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+      if (ucp_enables & (1 << plane)) {
+         nir_intrinsic_instr *discard;
+         nir_ssa_def *cond;
+
+         cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0));
+
+         discard = nir_intrinsic_instr_create(b.shader,
+                                              nir_intrinsic_discard_if);
+         discard->src[0] = nir_src_for_ssa(cond);
+         nir_builder_instr_insert(&b, &discard->instr);
+      }
+   }
+}
+
+/* insert conditional kill based on interpolated CLIPDIST
+ */
+void
+nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables)
+{
+   nir_variable *in[2];
+   int maxloc = -1;
+
+   if (!ucp_enables)
+      return;
+
+   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending CLIP_DIST0/CLIP_DIST1 after last existing
+       * input:
+       */
+      maxloc = MAX2(maxloc, loc);
+   }
+
+   /* The shader won't normally have CLIPDIST inputs, so we
+    * must add our own:
+    */
+   /* insert CLIPDIST outputs: */
+   if (ucp_enables & 0x0f)
+      in[0] =
+         create_clipdist_var(shader, ++maxloc, false,
+                             VARYING_SLOT_CLIP_DIST0);
+   if (ucp_enables & 0xf0)
+      in[1] =
+         create_clipdist_var(shader, ++maxloc, false,
+                             VARYING_SLOT_CLIP_DIST1);
+
+   nir_foreach_overload(shader, overload) {
+      if (!strcmp(overload->function->name, "main"))
+         lower_clip_fs(overload->impl, ucp_enables, in);
+   }
+}
-- 
cgit v1.2.3


From 91ec210ea8e35af8a7b30fa599b67b1faa55f34c Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 10 Sep 2015 16:09:13 -0400
Subject: freedreno/ir3: add support for ucp

Use nir_lower_clip pass for adding the VS/FS instructions to handle
user-clip-planes and CLIPDIST.  Wire up support for load_user_clip_plane
intrinsic to fetch ucp[plane] values as driver-params (passed as const's
to the shader).

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c    |  8 ++++++
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 32 ++++++++++++++++------
 src/gallium/drivers/freedreno/ir3/ir3_shader.c     | 27 ++++++++++++++----
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     | 26 ++++++++++++++++++
 4 files changed, 80 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index ede29f445dc..cbf748a00df 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -94,6 +94,7 @@ static void print_usage(void)
 	printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
 	printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
 	printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
+	printf("    --ucp MASK        - bitmask of enabled user-clip-planes\n");
 	printf("    --help            - show this message\n");
 }
 
@@ -190,6 +191,13 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (!strcmp(argv[n], "--ucp")) {
+			debug_printf(" %s %s", argv[n], argv[n+1]);
+			key.ucp_enables = strtol(argv[n+1], NULL, 0);
+			n += 2;
+			continue;
+		}
+
 		if (!strcmp(argv[n], "--help")) {
 			print_usage();
 			return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 17bac4106e9..d72464fb5a5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -127,7 +127,8 @@ struct ir3_compile {
 static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
 static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
 
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(const struct tgsi_token *tokens,
+		struct ir3_shader_variant *so)
 {
 	struct nir_shader_compiler_options options = {
 			.lower_fpow = true,
@@ -149,6 +150,11 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
 
 	nir_opt_global_to_local(s);
 	nir_convert_to_ssa(s);
+	if (s->stage == MESA_SHADER_VERTEX) {
+		nir_lower_clip_vs(s, so->key.ucp_enables);
+	} else if (s->stage == MESA_SHADER_FRAGMENT) {
+		nir_lower_clip_fs(s, so->key.ucp_enables);
+	}
 	nir_lower_idiv(s);
 	nir_lower_load_const_to_scalar(s);
 
@@ -251,7 +257,7 @@ compile_init(struct ir3_compiler *compiler,
 	lowered_tokens = lower_tgsi(ctx, tokens, so);
 	if (!lowered_tokens)
 		lowered_tokens = tokens;
-	ctx->s = to_nir(lowered_tokens);
+	ctx->s = to_nir(lowered_tokens, so);
 
 	if (lowered_tokens != tokens)
 		free((void *)lowered_tokens);
@@ -263,7 +269,7 @@ compile_init(struct ir3_compiler *compiler,
 	 *    num_uniform * vec4  -  user consts
 	 *    4 * vec4            -  UBO addresses
 	 *    if (vertex shader) {
-	 *        1 * vec4        -  driver params (IR3_DP_*)
+	 *        N * vec4        -  driver params (IR3_DP_*)
 	 *        1 * vec4        -  stream-out addresses
 	 *    }
 	 *
@@ -275,8 +281,8 @@ compile_init(struct ir3_compiler *compiler,
 	so->first_immediate += 4;
 
 	if (so->type == SHADER_VERTEX) {
-		/* one (vec4) slot for driver params (see ir3_driver_param): */
-		so->first_immediate++;
+		/* driver params (see ir3_driver_param): */
+		so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
 		/* one (vec4) slot for stream-output base addresses: */
 		so->first_immediate++;
 	}
@@ -828,7 +834,9 @@ static struct ir3_instruction *
 create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
 {
 	/* first four vec4 sysval's reserved for UBOs: */
-	unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+	/* NOTE: dp is in scalar, but there can be >4 dp components: */
+	unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+	unsigned r = regid(n + dp / 4, dp % 4);
 	return create_uniform(ctx, r);
 }
 
@@ -1199,7 +1207,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 	struct ir3_block *b = ctx->block;
 	struct ir3_instruction *addr, *src0, *src1;
 	/* UBO addresses are the first driver params: */
-	unsigned ubo = regid(ctx->so->first_driver_param, 0);
+	unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
 	unsigned off = intr->const_index[0];
 
 	/* First src is ubo index, which could either be an immed or not: */
@@ -1459,6 +1467,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		}
 		dst[0] = ctx->instance_id;
 		break;
+	case nir_intrinsic_load_user_clip_plane:
+		for (int i = 0; i < intr->num_components; i++) {
+			unsigned n = idx * 4 + i;
+			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+		}
+		break;
 	case nir_intrinsic_discard_if:
 	case nir_intrinsic_discard: {
 		struct ir3_instruction *cond, *kill;
@@ -2066,7 +2080,7 @@ emit_stream_out(struct ir3_compile *ctx)
 		unsigned stride = strmout->stride[i];
 		struct ir3_instruction *base, *off;
 
-		base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+		base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
 
 		/* 24-bit should be enough: */
 		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
@@ -2250,6 +2264,8 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
 		case VARYING_SLOT_BFC0:
 		case VARYING_SLOT_BFC1:
 		case VARYING_SLOT_FOGC:
+		case VARYING_SLOT_CLIP_DIST0:
+		case VARYING_SLOT_CLIP_DIST1:
 			break;
 		default:
 			if (slot >= VARYING_SLOT_VAR0)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 7b250509135..7b565332256 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -501,7 +501,7 @@ static void
 emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 		struct fd_constbuf_stateobj *constbuf)
 {
-	uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+	uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
 	if (v->constlen > offset) {
 		struct fd_context *ctx = fd_context(v->shader->pctx);
 		uint32_t params = MIN2(4, v->constlen - offset) * 4;
@@ -554,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 static void
 emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 {
-	uint32_t offset = v->first_driver_param + 5;  /* streamout addresses after driver-params*/
+	/* streamout addresses after driver-params: */
+	uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
 	if (v->constlen > offset) {
 		struct fd_context *ctx = fd_context(v->shader->pctx);
 		struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -657,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
 	/* emit driver params every time: */
 	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
 	if (info && (v->type == SHADER_VERTEX)) {
-		uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+		uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
 		if (v->constlen >= offset) {
-			uint32_t vertex_params[4] = {
+			uint32_t vertex_params[IR3_DP_COUNT] = {
 				[IR3_DP_VTXID_BASE] = info->indexed ?
 						info->index_bias : info->start,
 				[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
 			};
+			/* if no user-clip-planes, we don't need to emit the
+			 * entire thing:
+			 */
+			uint32_t vertex_params_size = 4;
+
+			if (v->key.ucp_enables) {
+				struct pipe_clip_state *ucp = &ctx->ucp;
+				unsigned pos = IR3_DP_UCP0_X;
+				for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+					for (unsigned j = 0; j < 4; j++) {
+						vertex_params[pos] = fui(ucp->ucp[i][j]);
+						pos++;
+					}
+				}
+				vertex_params_size = ARRAY_SIZE(vertex_params);
+			}
 
 			fd_wfi(ctx, ring);
 			ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-					ARRAY_SIZE(vertex_params), vertex_params, NULL);
+					vertex_params_size, vertex_params, NULL);
 
 			/* if needed, emit stream-out buffer addresses: */
 			if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 39b8864329b..6dc0ce1133f 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -39,8 +39,29 @@
 enum ir3_driver_param {
 	IR3_DP_VTXID_BASE = 0,
 	IR3_DP_VTXCNT_MAX = 1,
+	/* user-clip-plane components, up to 8x vec4's: */
+	IR3_DP_UCP0_X     = 4,
+	/* .... */
+	IR3_DP_UCP7_W     = 35,
+	IR3_DP_COUNT      = 36   /* must be aligned to vec4 */
 };
 
+/* Layout of constant registers:
+ *
+ *    num_uniform * vec4  -  user consts
+ *    4 * vec4            -  UBO addresses
+ *    if (vertex shader) {
+ *        N * vec4        -  driver params (IR3_DP_*)
+ *        1 * vec4        -  stream-out addresses
+ *    }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
+#define IR3_UBOS_OFF         0  /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
+
 /* Configuration key used to identify a shader variant.. different
  * shader variants can be used to implement features not supported
  * in hw (two sided color), binning-pass vertex shader, etc.
@@ -48,6 +69,11 @@ enum ir3_driver_param {
 struct ir3_shader_key {
 	union {
 		struct {
+			/*
+			 * Combined Vertex/Fragment shader parameters:
+			 */
+			unsigned ucp_enables : 8;
+
 			/* do we need to check {v,f}saturate_{s,t,r}? */
 			unsigned has_per_samp : 1;
 
-- 
cgit v1.2.3


From c970ec0577649e2f1b0532c2a62e279b784f8687 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 12 Sep 2015 11:15:32 -0400
Subject: freedreno/a4xx: wire up ucp support

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 2bd2ca23d54..025753c037e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -123,6 +123,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 			// TODO set .half_precision based on render target format,
 			// ie. float16 and smaller use half, float32 use full..
 			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+			.ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
 			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
 			.vsaturate_s = fd4_ctx->vsaturate_s,
 			.vsaturate_t = fd4_ctx->vsaturate_t,
-- 
cgit v1.2.3


From c70ed861722c45753ae9a24cf4c00643f0640e2f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 16 Sep 2015 13:57:26 -0400
Subject: freedreno/ir3: add --gpu arg to cmdline compiler

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index cbf748a00df..e768e6133a8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -95,6 +95,7 @@ static void print_usage(void)
 	printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
 	printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
 	printf("    --ucp MASK        - bitmask of enabled user-clip-planes\n");
+	printf("    --gpu GPU_ID      - specify gpu-id (default 320)\n");
 	printf("    --help            - show this message\n");
 }
 
@@ -108,6 +109,7 @@ int main(int argc, char **argv)
 	struct ir3_shader_variant v;
 	struct ir3_shader s;
 	struct ir3_shader_key key = {};
+	unsigned gpu_id = 320;
 	const char *info;
 	void *ptr;
 	size_t size;
@@ -198,6 +200,13 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (!strcmp(argv[n], "--gpu")) {
+			debug_printf(" %s %s", argv[n], argv[n+1]);
+			gpu_id = strtol(argv[n+1], NULL, 0);
+			n += 2;
+			continue;
+		}
+
 		if (!strcmp(argv[n], "--help")) {
 			print_usage();
 			return 0;
@@ -240,7 +249,7 @@ int main(int argc, char **argv)
 	}
 
 	/* TODO cmdline option to target different gpus: */
-	compiler = ir3_compiler_create(320);
+	compiler = ir3_compiler_create(gpu_id);
 
 	info = "NIR compiler";
 	ret = ir3_compile_shader_nir(compiler, &v);
-- 
cgit v1.2.3


From ba78260b0fbd291618df63bc7a54be0a36148b40 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 18:18:19 -0400
Subject: nir: some comment fixups

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/nir/nir.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 916226791e6..9c091e8d84b 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1455,13 +1455,13 @@ typedef struct nir_shader_compiler_options {
 } nir_shader_compiler_options;
 
 typedef struct nir_shader {
-   /** hash table of name -> uniform nir_variable */
+   /** list of uniforms (nir_variable) */
    struct exec_list uniforms;
 
-   /** hash table of name -> input nir_variable */
+   /** list of inputs (nir_variable) */
    struct exec_list inputs;
 
-   /** hash table of name -> output nir_variable */
+   /** list of outputs (nir_variable) */
    struct exec_list outputs;
 
    /** Set of driver-specific options for the shader.
@@ -1471,10 +1471,10 @@ typedef struct nir_shader {
     */
    const struct nir_shader_compiler_options *options;
 
-   /** list of global variables in the shader */
+   /** list of global variables in the shader (nir_variable) */
    struct exec_list globals;
 
-   /** list of system value variables in the shader */
+   /** list of system value variables in the shade (nir_variable) */
    struct exec_list system_values;
 
    struct exec_list functions; /** < list of nir_function */
-- 
cgit v1.2.3


From 5305603b9de52e59b9c5eec440f90a12cafa91a1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 18:18:45 -0400
Subject: nir/print: print variable names

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/nir/nir_print.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index ca8059fc920..6e86140ed9d 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -430,6 +430,36 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
    }
 
    fprintf(fp, ")");
+
+   if (!state->shader)
+      return;
+
+   struct exec_list *var_list = NULL;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_uniform_indirect:
+      var_list = &state->shader->uniforms;
+      break;
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_input_indirect:
+      var_list = &state->shader->inputs;
+      break;
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_output_indirect:
+      var_list = &state->shader->outputs;
+      break;
+   default:
+      return;
+   }
+
+   foreach_list_typed(nir_variable, var, node, var_list) {
+      if ((var->data.driver_location == instr->const_index[0]) &&
+          var->name) {
+         fprintf(fp, "\t/* %s */", var->name);
+         break;
+      }
+   }
 }
 
 static void
-- 
cgit v1.2.3


From 7c72f593adc0bbe9570236636abf64b4fc18d88d Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 21:06:11 -0400
Subject: nir: really actually fix comment this time

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 9c091e8d84b..83e31533e8b 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1474,7 +1474,7 @@ typedef struct nir_shader {
    /** list of global variables in the shader (nir_variable) */
    struct exec_list globals;
 
-   /** list of system value variables in the shade (nir_variable) */
+   /** list of system value variables in the shader (nir_variable) */
    struct exec_list system_values;
 
    struct exec_list functions; /** < list of nir_function */
-- 
cgit v1.2.3


From 2e4ab489b5963e57df01fa20bb95d67139de8b75 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 21:07:41 -0400
Subject: nir/builder: fix c++11 compiler warning

Fixes:

   In file included from nir/nir_lower_samplers.cpp:27:0:
   nir/nir_builder.h: In function 'nir_ssa_def* nir_channel(nir_builder*, nir_ssa_def*, int)':
   nir/nir_builder.h:222:37: warning: narrowing conversion of 'c' from 'int' to 'unsigned int' inside { } is ill-formed in C++11 [-Wnarrowing]
       unsigned swizzle[4] = {c, c, c, c};

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir_builder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 44134cf4c29..47533302a6d 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -217,7 +217,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
 }
 
 static inline nir_ssa_def *
-nir_channel(nir_builder *b, nir_ssa_def *def, int c)
+nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
 {
    unsigned swizzle[4] = {c, c, c, c};
    return nir_swizzle(b, def, swizzle, 1, false);
-- 
cgit v1.2.3


From afa1efdc8522d987e3af7c7a6272021caa33eb82 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Tue, 15 Sep 2015 09:17:20 +0300
Subject: mesa: fix errors when reading depth with glReadPixels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenGL ES 3.0 spec 3.7.2 "Transfer of Pixel Rectangles" specifies
DEPTH_COMPONENT, UNSIGNED_INT as a valid couple, validation for
internal format is checked by is_float_depth().

Fix regression caused by 81d2fd91a90e5b2fd9fd74792a7a7c329f0e4d29 in:
   ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels

Test uses GL_DEPTH_COMPONENT, UNSIGNED_INT only when GL_NV_read_depth
extension is present.

v2: change check in _mesa_error_check_format_and_type to be explicit
    for ES 2.0+, desktop OpenGL does not allow this behaviour + uses
    this function for both glReadPixels and glDrawPixels validation.
    (No Piglit regressions seen with v2.)

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> [v1]
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92009
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/main/glformats.c | 7 ++++++-
 src/mesa/main/readpix.c   | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index c1d3c7df488..eb4a02acd17 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -494,7 +494,8 @@ _mesa_bytes_per_pixel(GLenum format, GLenum type)
       else
          return -1;
    case GL_UNSIGNED_INT_24_8_EXT:
-      if (format == GL_DEPTH_STENCIL_EXT)
+      if (format == GL_DEPTH_COMPONENT ||
+          format == GL_DEPTH_STENCIL_EXT)
          return sizeof(GLuint);
       else
          return -1;
@@ -1789,6 +1790,10 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
       return GL_INVALID_OPERATION;
 
    case GL_UNSIGNED_INT_24_8:
+      /* Depth buffer OK to read in OpenGL ES (NV_read_depth). */
+      if (ctx->API == API_OPENGLES2 && format == GL_DEPTH_COMPONENT)
+         return GL_NO_ERROR;
+
       if (format != GL_DEPTH_STENCIL) {
          return GL_INVALID_OPERATION;
       }
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 76ef8ee9487..81bb912beb6 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -963,6 +963,7 @@ read_pixels_es3_error_check(GLenum format, GLenum type,
             return GL_NO_ERROR;
          break;
       case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT:
       case GL_UNSIGNED_INT_24_8:
          if (!is_float_depth)
             return GL_NO_ERROR;
-- 
cgit v1.2.3


From f7ca52dd6d8b4b0fcdf01d8def5edf913ece6861 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 18 Sep 2015 11:02:34 +0200
Subject: i965/fs: Fix comparison between signed and unsigned integer
 expressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

brw_fs_visitor.cpp: In member function 'void fs_visitor::emit_urb_writes()':
brw_fs_visitor.cpp:977:58: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b85b52b38d8..5bf6449fa13 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -974,9 +974,9 @@ fs_visitor::emit_urb_writes()
                sources[length++] = reg;
             }
          } else {
-            for (int i = 0; i < output_components[varying]; i++)
+            for (unsigned i = 0; i < output_components[varying]; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
-            for (int i = output_components[varying]; i < 4; i++)
+            for (unsigned i = output_components[varying]; i < 4; i++)
                sources[length++] = fs_reg(0);
          }
          break;
-- 
cgit v1.2.3


From 06d31dceae611b7d5c11442aa9bf8178067bcb62 Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Wed, 16 Sep 2015 10:26:55 +0200
Subject: i965/vec4: Change types as needed to propagate source modifiers using
 current instruction

SEL and MOV instructions, as long as they don't have source modifiers, are
just copying bits around.  So those kind of instruction could be propagated
even if there are type mismatches. This is needed because NIR generates
integer SEL and MOV instructions whenever it doesn't know what else to
generate.

This commit adds support for copy propagation using current instruction
as reference.

Equivalent to commit 472ef9 but for vec4.

v2: include check for saturate, as Jason Ekstrand suggested
v3: check that the dst.type and the src type are the same, in order to
    solve (among others) the following deqp regression with v2:
    dEQP-GLES3.functional.shaders.operator.unary_operator.minus.lowp_uint_vertex

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 30 ++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 5a15eb89766..1522eeabb1c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -248,6 +248,18 @@ try_constant_propagate(const struct brw_device_info *devinfo,
    return false;
 }
 
+static bool
+can_change_source_types(vec4_instruction *inst)
+{
+   return inst->dst.type == inst->src[0].type &&
+      !inst->src[0].abs && !inst->src[0].negate && !inst->saturate &&
+      (inst->opcode == BRW_OPCODE_MOV ||
+       (inst->opcode == BRW_OPCODE_SEL &&
+        inst->dst.type == inst->src[1].type &&
+        inst->predicate != BRW_PREDICATE_NONE &&
+        !inst->src[1].abs && !inst->src[1].negate));
+}
+
 static bool
 try_copy_propagate(const struct brw_device_info *devinfo,
                    vec4_instruction *inst,
@@ -308,7 +320,9 @@ try_copy_propagate(const struct brw_device_info *devinfo,
         value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo))
       return false;
 
-   if (has_source_modifiers && value.type != inst->src[arg].type)
+   if (has_source_modifiers &&
+       value.type != inst->src[arg].type &&
+       !can_change_source_types(inst))
       return false;
 
    if (has_source_modifiers &&
@@ -362,7 +376,19 @@ try_copy_propagate(const struct brw_device_info *devinfo,
       }
    }
 
-   value.type = inst->src[arg].type;
+   if (has_source_modifiers &&
+       value.type != inst->src[arg].type) {
+      /* We are propagating source modifiers from a MOV with a different
+       * type.  If we got here, then we can just change the source and
+       * destination types of the instruction and keep going.
+       */
+      assert(can_change_source_types(inst));
+      for (int i = 0; i < 3; i++) {
+         inst->src[i].type = value.type;
+      }
+      inst->dst.type = value.type;
+   } else
+      value.type = inst->src[arg].type;
    inst->src[arg] = value;
    return true;
 }
-- 
cgit v1.2.3


From d9b9ff76f17ee36b87b2722fa2a19e1d9f036c26 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 07:54:35 -0400
Subject: nir: rename nir_lower_tex_projector

Since the following patches will add additional tex-lowering related
functionality, which doesn't make sense to split out into a separate
pass (as they would require duplication of the projector lowering
logic), let's give this pass a more generic name.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/Makefile.sources              |   2 +-
 src/glsl/nir/nir.h                     |   2 +-
 src/glsl/nir/nir_lower_tex.c           | 137 +++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_lower_tex_projector.c | 137 ---------------------------------
 src/mesa/drivers/dri/i965/brw_nir.c    |   2 +-
 5 files changed, 140 insertions(+), 140 deletions(-)
 create mode 100644 src/glsl/nir/nir_lower_tex.c
 delete mode 100644 src/glsl/nir/nir_lower_tex_projector.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 5134ab73800..b539b45e432 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -45,7 +45,7 @@ NIR_FILES = \
 	nir/nir_lower_phis_to_scalar.c \
 	nir/nir_lower_samplers.cpp \
 	nir/nir_lower_system_values.c \
-	nir/nir_lower_tex_projector.c \
+	nir/nir_lower_tex.c \
 	nir/nir_lower_to_source_mods.c \
 	nir/nir_lower_vars_to_ssa.c \
 	nir/nir_lower_var_copies.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 83e31533e8b..c484d8e81ce 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1836,7 +1836,7 @@ void nir_lower_samplers(nir_shader *shader,
                         const struct gl_shader_program *shader_program);
 
 void nir_lower_system_values(nir_shader *shader);
-void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_tex(nir_shader *shader);
 void nir_lower_idiv(nir_shader *shader);
 
 void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
new file mode 100644
index 00000000000..b5ac1b2647d
--- /dev/null
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+nir_lower_tex_block(nir_block *block, void *void_state)
+{
+   nir_builder *b = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      b->cursor = nir_before_instr(&tex->instr);
+
+      /* Find the projector in the srcs list, if present. */
+      unsigned proj_index;
+      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+         if (tex->src[proj_index].src_type == nir_tex_src_projector)
+            break;
+      }
+      if (proj_index == tex->num_srcs)
+         continue;
+      nir_ssa_def *inv_proj =
+         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+      /* Walk through the sources projecting the arguments. */
+      for (unsigned i = 0; i < tex->num_srcs; i++) {
+         switch (tex->src[i].src_type) {
+         case nir_tex_src_coord:
+         case nir_tex_src_comparitor:
+            break;
+         default:
+            continue;
+         }
+         nir_ssa_def *unprojected =
+            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+         /* Array indices don't get projected, so make an new vector with the
+          * coordinate's array index untouched.
+          */
+         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+            switch (tex->coord_components) {
+            case 4:
+               projected = nir_vec4(b,
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, projected, 1),
+                                    nir_channel(b, projected, 2),
+                                    nir_channel(b, unprojected, 3));
+               break;
+            case 3:
+               projected = nir_vec3(b,
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, projected, 1),
+                                    nir_channel(b, unprojected, 2));
+               break;
+            case 2:
+               projected = nir_vec2(b,
+                                    nir_channel(b, projected, 0),
+                                    nir_channel(b, unprojected, 1));
+               break;
+            default:
+               unreachable("bad texture coord count for array");
+               break;
+            }
+         }
+
+         nir_instr_rewrite_src(&tex->instr,
+                               &tex->src[i].src,
+                               nir_src_for_ssa(projected));
+      }
+
+      /* Now move the later tex sources down the array so that the projector
+       * disappears.
+       */
+      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+                            NIR_SRC_INIT);
+      for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
+         tex->src[i-1].src_type = tex->src[i].src_type;
+         nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+      }
+      tex->num_srcs--;
+   }
+
+   return true;
+}
+
+static void
+nir_lower_tex_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, nir_lower_tex_block, &b);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_tex(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_tex_impl(overload->impl);
+   }
+}
diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c
deleted file mode 100644
index 9afa42f23a9..00000000000
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright © 2015 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * This lowering pass converts the coordinate division for texture projection
- * to be done in ALU instructions instead of asking the texture operation to
- * do so.
- */
-
-#include "nir.h"
-#include "nir_builder.h"
-
-static bool
-nir_lower_tex_projector_block(nir_block *block, void *void_state)
-{
-   nir_builder *b = void_state;
-
-   nir_foreach_instr_safe(block, instr) {
-      if (instr->type != nir_instr_type_tex)
-         continue;
-
-      nir_tex_instr *tex = nir_instr_as_tex(instr);
-      b->cursor = nir_before_instr(&tex->instr);
-
-      /* Find the projector in the srcs list, if present. */
-      unsigned proj_index;
-      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
-         if (tex->src[proj_index].src_type == nir_tex_src_projector)
-            break;
-      }
-      if (proj_index == tex->num_srcs)
-         continue;
-      nir_ssa_def *inv_proj =
-         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
-
-      /* Walk through the sources projecting the arguments. */
-      for (unsigned i = 0; i < tex->num_srcs; i++) {
-         switch (tex->src[i].src_type) {
-         case nir_tex_src_coord:
-         case nir_tex_src_comparitor:
-            break;
-         default:
-            continue;
-         }
-         nir_ssa_def *unprojected =
-            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
-         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
-
-         /* Array indices don't get projected, so make an new vector with the
-          * coordinate's array index untouched.
-          */
-         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
-            switch (tex->coord_components) {
-            case 4:
-               projected = nir_vec4(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, projected, 1),
-                                    nir_channel(b, projected, 2),
-                                    nir_channel(b, unprojected, 3));
-               break;
-            case 3:
-               projected = nir_vec3(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, projected, 1),
-                                    nir_channel(b, unprojected, 2));
-               break;
-            case 2:
-               projected = nir_vec2(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, unprojected, 1));
-               break;
-            default:
-               unreachable("bad texture coord count for array");
-               break;
-            }
-         }
-
-         nir_instr_rewrite_src(&tex->instr,
-                               &tex->src[i].src,
-                               nir_src_for_ssa(projected));
-      }
-
-      /* Now move the later tex sources down the array so that the projector
-       * disappears.
-       */
-      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
-                            NIR_SRC_INIT);
-      for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
-         tex->src[i-1].src_type = tex->src[i].src_type;
-         nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
-      }
-      tex->num_srcs--;
-   }
-
-   return true;
-}
-
-static void
-nir_lower_tex_projector_impl(nir_function_impl *impl)
-{
-   nir_builder b;
-   nir_builder_init(&b, impl);
-
-   nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
-
-   nir_metadata_preserve(impl, nir_metadata_block_index |
-                               nir_metadata_dominance);
-}
-
-void
-nir_lower_tex_projector(nir_shader *shader)
-{
-   nir_foreach_overload(shader, overload) {
-      if (overload->impl)
-         nir_lower_tex_projector_impl(overload->impl);
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 9a0bbb06847..0d5b6dd7291 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -96,7 +96,7 @@ brw_create_nir(struct brw_context *brw,
    nir_lower_global_vars_to_local(nir);
    nir_validate_shader(nir);
 
-   nir_lower_tex_projector(nir);
+   nir_lower_tex(nir);
    nir_validate_shader(nir);
 
    nir_normalize_cubemap_coords(nir);
-- 
cgit v1.2.3


From f83ba7bc41a381a0e52c456431dbe1a15176ffb4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 16 Sep 2015 12:53:12 -0400
Subject: nir/lower_tex: split out project_src() helper

Split this out to reduce noise in later patches.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_lower_tex.c | 146 +++++++++++++++++++++++--------------------
 1 file changed, 77 insertions(+), 69 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index b5ac1b2647d..b3efb972cfe 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -30,6 +30,82 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+static void
+project_src(nir_builder *b, nir_tex_instr *tex)
+{
+   /* Find the projector in the srcs list, if present. */
+   unsigned proj_index;
+   for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+      if (tex->src[proj_index].src_type == nir_tex_src_projector)
+         break;
+   }
+   if (proj_index == tex->num_srcs)
+      return;
+
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_ssa_def *inv_proj =
+      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+   /* Walk through the sources projecting the arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_coord:
+      case nir_tex_src_comparitor:
+         break;
+      default:
+         continue;
+      }
+      nir_ssa_def *unprojected =
+         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+      /* Array indices don't get projected, so make an new vector with the
+       * coordinate's array index untouched.
+       */
+      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+         switch (tex->coord_components) {
+         case 4:
+            projected = nir_vec4(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, projected, 1),
+                                 nir_channel(b, projected, 2),
+                                 nir_channel(b, unprojected, 3));
+            break;
+         case 3:
+            projected = nir_vec3(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, projected, 1),
+                                 nir_channel(b, unprojected, 2));
+            break;
+         case 2:
+            projected = nir_vec2(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, unprojected, 1));
+            break;
+         default:
+            unreachable("bad texture coord count for array");
+            break;
+         }
+      }
+
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(projected));
+   }
+
+   /* Now move the later tex sources down the array so that the projector
+    * disappears.
+    */
+   nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+                         NIR_SRC_INIT);
+   for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
+      tex->src[i-1].src_type = tex->src[i].src_type;
+      nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+   }
+   tex->num_srcs--;
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
@@ -40,76 +116,8 @@ nir_lower_tex_block(nir_block *block, void *void_state)
          continue;
 
       nir_tex_instr *tex = nir_instr_as_tex(instr);
-      b->cursor = nir_before_instr(&tex->instr);
 
-      /* Find the projector in the srcs list, if present. */
-      unsigned proj_index;
-      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
-         if (tex->src[proj_index].src_type == nir_tex_src_projector)
-            break;
-      }
-      if (proj_index == tex->num_srcs)
-         continue;
-      nir_ssa_def *inv_proj =
-         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
-
-      /* Walk through the sources projecting the arguments. */
-      for (unsigned i = 0; i < tex->num_srcs; i++) {
-         switch (tex->src[i].src_type) {
-         case nir_tex_src_coord:
-         case nir_tex_src_comparitor:
-            break;
-         default:
-            continue;
-         }
-         nir_ssa_def *unprojected =
-            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
-         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
-
-         /* Array indices don't get projected, so make an new vector with the
-          * coordinate's array index untouched.
-          */
-         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
-            switch (tex->coord_components) {
-            case 4:
-               projected = nir_vec4(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, projected, 1),
-                                    nir_channel(b, projected, 2),
-                                    nir_channel(b, unprojected, 3));
-               break;
-            case 3:
-               projected = nir_vec3(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, projected, 1),
-                                    nir_channel(b, unprojected, 2));
-               break;
-            case 2:
-               projected = nir_vec2(b,
-                                    nir_channel(b, projected, 0),
-                                    nir_channel(b, unprojected, 1));
-               break;
-            default:
-               unreachable("bad texture coord count for array");
-               break;
-            }
-         }
-
-         nir_instr_rewrite_src(&tex->instr,
-                               &tex->src[i].src,
-                               nir_src_for_ssa(projected));
-      }
-
-      /* Now move the later tex sources down the array so that the projector
-       * disappears.
-       */
-      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
-                            NIR_SRC_INIT);
-      for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
-         tex->src[i-1].src_type = tex->src[i].src_type;
-         nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
-      }
-      tex->num_srcs--;
+      project_src(b, tex);
    }
 
    return true;
-- 
cgit v1.2.3


From faf5f174ddbc7680f6947ceababb94fdb552bcdb Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 16 Sep 2015 12:56:58 -0400
Subject: nir/lower_tex: support projector lowering per sampler type

Some hardware, such as adreno a3xx, supports txp on some but not all
sampler types.  In this case we want more fine grained control over
which texture projectors get lowered.

v2: split out nir_lower_tex_options struct to make it easier to
add the additional parameters coming in the following patches

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.h                  | 13 ++++++++++++-
 src/glsl/nir/nir_lower_tex.c        | 26 ++++++++++++++++++--------
 src/mesa/drivers/dri/i965/brw_nir.c |  5 ++++-
 3 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index c484d8e81ce..4600fb0a744 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1836,7 +1836,18 @@ void nir_lower_samplers(nir_shader *shader,
                         const struct gl_shader_program *shader_program);
 
 void nir_lower_system_values(nir_shader *shader);
-void nir_lower_tex(nir_shader *shader);
+
+typedef struct nir_lower_tex_options {
+   /**
+    * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+    * sampler types a texture projector is lowered.
+    */
+   unsigned lower_txp;
+} nir_lower_tex_options;
+
+void nir_lower_tex(nir_shader *shader,
+                   const nir_lower_tex_options *options);
+
 void nir_lower_idiv(nir_shader *shader);
 
 void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index b3efb972cfe..281fc9f3e8f 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -30,6 +30,11 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+typedef struct {
+   nir_builder b;
+   const nir_lower_tex_options *options;
+} lower_tex_state;
+
 static void
 project_src(nir_builder *b, nir_tex_instr *tex)
 {
@@ -109,37 +114,42 @@ project_src(nir_builder *b, nir_tex_instr *tex)
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
-   nir_builder *b = void_state;
+   lower_tex_state *state = void_state;
+   nir_builder *b = &state->b;
 
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_tex)
          continue;
 
       nir_tex_instr *tex = nir_instr_as_tex(instr);
+      bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
+
+      if (lower_txp)
+         project_src(b, tex);
 
-      project_src(b, tex);
    }
 
    return true;
 }
 
 static void
-nir_lower_tex_impl(nir_function_impl *impl)
+nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
 {
-   nir_builder b;
-   nir_builder_init(&b, impl);
+   nir_builder_init(&state->b, impl);
 
-   nir_foreach_block(impl, nir_lower_tex_block, &b);
+   nir_foreach_block(impl, nir_lower_tex_block, state);
 
    nir_metadata_preserve(impl, nir_metadata_block_index |
                                nir_metadata_dominance);
 }
 
 void
-nir_lower_tex(nir_shader *shader)
+nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
 {
+   lower_tex_state state;
+   state.options = options;
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         nir_lower_tex_impl(overload->impl);
+         nir_lower_tex_impl(overload->impl, &state);
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 0d5b6dd7291..b47b87e07dd 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -80,6 +80,9 @@ brw_create_nir(struct brw_context *brw,
    struct gl_context *ctx = &brw->ctx;
    const nir_shader_compiler_options *options =
       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+   static const nir_lower_tex_options tex_options = {
+      .lower_txp = ~0,
+   };
    struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
    bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
    nir_shader *nir;
@@ -96,7 +99,7 @@ brw_create_nir(struct brw_context *brw,
    nir_lower_global_vars_to_local(nir);
    nir_validate_shader(nir);
 
-   nir_lower_tex(nir);
+   nir_lower_tex(nir, &tex_options);
    nir_validate_shader(nir);
 
    nir_normalize_cubemap_coords(nir);
-- 
cgit v1.2.3


From 1ce8060c25c7f2c7a54159fab6a6974c0ba182a8 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 16 Sep 2015 16:49:14 -0400
Subject: nir/lower_tex: support for lowering RECT textures

v2: comments/suggestions from Ilia and Eric, split out get_texture_size()
helper so we can use it in the next commit for clamping RECT textures.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.h           |  7 ++++++
 src/glsl/nir/nir_lower_tex.c | 59 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 63 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4600fb0a744..3c908b9f295 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1843,6 +1843,13 @@ typedef struct nir_lower_tex_options {
     * sampler types a texture projector is lowered.
     */
    unsigned lower_txp;
+
+   /**
+    * If true, lower rect textures to 2D, using txs to fetch the
+    * texture dimensions and dividing the texture coords by the
+    * texture dims to normalize.
+    */
+   bool lower_rect;
 } nir_lower_tex_options;
 
 void nir_lower_tex(nir_shader *shader,
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 281fc9f3e8f..63f51bcbdc5 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -22,9 +22,13 @@
  */
 
 /*
- * This lowering pass converts the coordinate division for texture projection
- * to be done in ALU instructions instead of asking the texture operation to
- * do so.
+ * This lowering pass supports (as configured via nir_lower_tex_options)
+ * various texture related conversions:
+ *   + texture projector lowering: converts the coordinate division for
+ *     texture projection to be done in ALU instructions instead of
+ *     asking the texture operation to do so.
+ *   + lowering RECT: converts the un-normalized RECT texture coordinates
+ *     to normalized coordinates with txs plus ALU instructions
  */
 
 #include "nir.h"
@@ -111,6 +115,52 @@ project_src(nir_builder *b, nir_tex_instr *tex)
    tex->num_srcs--;
 }
 
+static nir_ssa_def *
+get_texture_size(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   /* RECT textures should not be array: */
+   assert(!tex->is_array);
+
+   nir_tex_instr *txs;
+
+   txs = nir_tex_instr_create(b->shader, 1);
+   txs->op = nir_texop_txs;
+   txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+   txs->sampler_index = tex->sampler_index;
+
+   /* only single src, the lod: */
+   txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
+   txs->src[0].src_type = nir_tex_src_lod;
+
+   nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+   nir_builder_instr_insert(b, &txs->instr);
+
+   return nir_i2f(b, &txs->dest.ssa);
+}
+
+static void
+lower_rect(nir_builder *b, nir_tex_instr *tex)
+{
+   nir_ssa_def *txs = get_texture_size(b, tex);
+   nir_ssa_def *scale = nir_frcp(b, txs);
+
+   /* Walk through the sources normalizing the requested arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type != nir_tex_src_coord)
+         continue;
+
+      nir_ssa_def *coords =
+         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
+   }
+
+   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
@@ -127,6 +177,9 @@ nir_lower_tex_block(nir_block *block, void *void_state)
       if (lower_txp)
          project_src(b, tex);
 
+      if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
+          state->options->lower_rect)
+         lower_rect(b, tex);
    }
 
    return true;
-- 
cgit v1.2.3


From 3745c38425b3e1da5c94a5f900eb5fdc44da9439 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 18 Sep 2015 10:44:27 -0400
Subject: nir/lower_tex: add support to clamp texture coords

Some hardware needs to clamp texture coordinates to [0.0, 1.0] in the
shader to emulate GL_CLAMP.  This is added to lower_tex_proj since, in
the case of projected coords, the clamping needs to happen *after*
projection.

v2: comments/suggestions from Ilia and Eric, use txs to get texture size
and clamp RECT textures to their dimensions rather than [0.0, 1.0] to
avoid having to lower RECT textures to 2D.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.h           | 18 ++++++++++
 src/glsl/nir/nir_lower_tex.c | 86 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3c908b9f295..255d45585db 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1850,6 +1850,24 @@ typedef struct nir_lower_tex_options {
     * texture dims to normalize.
     */
    bool lower_rect;
+
+   /**
+    * To emulate certain texture wrap modes, this can be used
+    * to saturate the specified tex coord to [0.0, 1.0].  The
+    * bits are according to sampler #, ie. if, for example:
+    *
+    *   (conf->saturate_s & (1 << n))
+    *
+    * is true, then the s coord for sampler n is saturated.
+    *
+    * Note that clamping must happen *after* projector lowering
+    * so any projected texture sample instruction with a clamped
+    * coordinate gets automatically lowered, regardless of the
+    * 'lower_txp' setting.
+    */
+   unsigned saturate_s;
+   unsigned saturate_t;
+   unsigned saturate_r;
 } nir_lower_tex_options;
 
 void nir_lower_tex(nir_shader *shader,
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 63f51bcbdc5..e2f095a5532 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -29,6 +29,10 @@
  *     asking the texture operation to do so.
  *   + lowering RECT: converts the un-normalized RECT texture coordinates
  *     to normalized coordinates with txs plus ALU instructions
+ *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
+ *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
+ *     Note that this automatically triggers texture projector lowering if
+ *     needed, since clamping must happen after projector lowering.
  */
 
 #include "nir.h"
@@ -161,6 +165,70 @@ lower_rect(nir_builder *b, nir_tex_instr *tex)
    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
 }
 
+static void
+saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   /* Walk through the sources saturating the requested arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type != nir_tex_src_coord)
+         continue;
+
+      nir_ssa_def *src =
+         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+
+      /* split src into components: */
+      nir_ssa_def *comp[4];
+
+      for (unsigned j = 0; j < tex->coord_components; j++)
+         comp[j] = nir_channel(b, src, j);
+
+      /* clamp requested components, array index does not get clamped: */
+      unsigned ncomp = tex->coord_components;
+      if (tex->is_array)
+         ncomp--;
+
+      for (unsigned j = 0; j < ncomp; j++) {
+         if ((1 << j) & sat_mask) {
+            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+               /* non-normalized texture coords, so clamp to texture
+                * size rather than [0.0, 1.0]
+                */
+               nir_ssa_def *txs = get_texture_size(b, tex);
+               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
+               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
+            } else {
+               comp[j] = nir_fsat(b, comp[j]);
+            }
+         }
+      }
+
+      /* and move the result back into a single vecN: */
+      switch (tex->coord_components) {
+      case 4:
+         src = nir_vec4(b, comp[0], comp[1], comp[2], comp[3]);
+         break;
+      case 3:
+         src = nir_vec3(b, comp[0], comp[1], comp[2]);
+         break;
+      case 2:
+         src = nir_vec2(b, comp[0], comp[1]);
+         break;
+      case 1:
+         src = comp[0];
+         break;
+      default:
+         unreachable("bad texture coord count");
+         break;
+      }
+
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(src));
+   }
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
@@ -174,12 +242,28 @@ nir_lower_tex_block(nir_block *block, void *void_state)
       nir_tex_instr *tex = nir_instr_as_tex(instr);
       bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
 
-      if (lower_txp)
+      /* mask of src coords to saturate (clamp): */
+      unsigned sat_mask = 0;
+
+      if ((1 << tex->sampler_index) & state->options->saturate_r)
+         sat_mask |= (1 << 2);    /* .z */
+      if ((1 << tex->sampler_index) & state->options->saturate_t)
+         sat_mask |= (1 << 1);    /* .y */
+      if ((1 << tex->sampler_index) & state->options->saturate_s)
+         sat_mask |= (1 << 0);    /* .x */
+
+      /* If we are clamping any coords, we must lower projector first
+       * as clamping happens *after* projection:
+       */
+      if (lower_txp || sat_mask)
          project_src(b, tex);
 
       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
           state->options->lower_rect)
          lower_rect(b, tex);
+
+      if (sat_mask)
+         saturate_src(b, tex, sat_mask);
    }
 
    return true;
-- 
cgit v1.2.3


From c71cb670ba786a4bc443d23e6aa4b866e069dcd2 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 16 Sep 2015 13:42:21 -0400
Subject: freedreno/ir3: lower txp/clamp in NIR

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 56 ++++++++++++----------
 1 file changed, 30 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index d72464fb5a5..c609d3c77fd 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -127,10 +127,10 @@ struct ir3_compile {
 static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
 static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
 
-static struct nir_shader *to_nir(const struct tgsi_token *tokens,
-		struct ir3_shader_variant *so)
+static struct nir_shader *to_nir(struct ir3_compile *ctx,
+		const struct tgsi_token *tokens, struct ir3_shader_variant *so)
 {
-	struct nir_shader_compiler_options options = {
+	static const nir_shader_compiler_options options = {
 			.lower_fpow = true,
 			.lower_fsat = true,
 			.lower_scmp = true,
@@ -138,8 +138,33 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens,
 			.lower_ffract = true,
 			.native_integers = true,
 	};
+	struct nir_lower_tex_options tex_options = {
+			.lower_rect = 0,
+	};
 	bool progress;
 
+	switch (so->type) {
+	case SHADER_FRAGMENT:
+	case SHADER_COMPUTE:
+		tex_options.saturate_s = so->key.fsaturate_s;
+		tex_options.saturate_t = so->key.fsaturate_t;
+		tex_options.saturate_r = so->key.fsaturate_r;
+		break;
+	case SHADER_VERTEX:
+		tex_options.saturate_s = so->key.vsaturate_s;
+		tex_options.saturate_t = so->key.vsaturate_t;
+		tex_options.saturate_r = so->key.vsaturate_r;
+		break;
+	}
+
+	if (ctx->compiler->gpu_id >= 400) {
+		/* a4xx seems to have *no* sam.p */
+		tex_options.lower_txp = ~0;  /* lower all txp */
+	} else {
+		/* a3xx just needs to avoid sam.p for 3d tex */
+		tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
+	}
+
 	struct nir_shader *s = tgsi_to_nir(tokens, &options);
 
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
@@ -155,6 +180,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens,
 	} else if (s->stage == MESA_SHADER_FRAGMENT) {
 		nir_lower_clip_fs(s, so->key.ucp_enables);
 	}
+	nir_lower_tex(s, &tex_options);
 	nir_lower_idiv(s);
 	nir_lower_load_const_to_scalar(s);
 
@@ -196,28 +222,6 @@ lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
 			.color_two_side = so->key.color_two_side,
 	};
 
-	switch (so->type) {
-	case SHADER_FRAGMENT:
-	case SHADER_COMPUTE:
-		lconfig.saturate_s = so->key.fsaturate_s;
-		lconfig.saturate_t = so->key.fsaturate_t;
-		lconfig.saturate_r = so->key.fsaturate_r;
-		break;
-	case SHADER_VERTEX:
-		lconfig.saturate_s = so->key.vsaturate_s;
-		lconfig.saturate_t = so->key.vsaturate_t;
-		lconfig.saturate_r = so->key.vsaturate_r;
-		break;
-	}
-
-	if (ctx->compiler->gpu_id >= 400) {
-		/* a4xx seems to have *no* sam.p */
-		lconfig.lower_TXP = ~0;  /* lower all txp */
-	} else {
-		/* a3xx just needs to avoid sam.p for 3d tex */
-		lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
-	}
-
 	return tgsi_transform_lowering(&lconfig, tokens, &info);
 }
 
@@ -257,7 +261,7 @@ compile_init(struct ir3_compiler *compiler,
 	lowered_tokens = lower_tgsi(ctx, tokens, so);
 	if (!lowered_tokens)
 		lowered_tokens = tokens;
-	ctx->s = to_nir(lowered_tokens, so);
+	ctx->s = to_nir(ctx, lowered_tokens, so);
 
 	if (lowered_tokens != tokens)
 		free((void *)lowered_tokens);
-- 
cgit v1.2.3


From e4dfcdcbecee9d11709e85524222d748d0e27112 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 18 Sep 2015 13:23:36 -0400
Subject: nir/build: add nir_vec() helper

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_builder.h                    | 18 ++++++++++++++++++
 src/glsl/nir/nir_lower_load_const_to_scalar.c | 15 +--------------
 src/glsl/nir/nir_lower_tex.c                  | 18 +-----------------
 3 files changed, 20 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 47533302a6d..8db5fcf039d 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -173,6 +173,24 @@ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
 
 #include "nir_builder_opcodes.h"
 
+static inline nir_ssa_def *
+nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+{
+   switch (num_components) {
+   case 4:
+      return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+   case 3:
+      return nir_vec3(build, comp[0], comp[1], comp[2]);
+   case 2:
+      return nir_vec2(build, comp[0], comp[1]);
+   case 1:
+      return comp[0];
+   default:
+      unreachable("bad component count");
+      return NULL;
+   }
+}
+
 /**
  * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
  */
diff --git a/src/glsl/nir/nir_lower_load_const_to_scalar.c b/src/glsl/nir/nir_lower_load_const_to_scalar.c
index 704f8cebfd8..84d0c1453cb 100644
--- a/src/glsl/nir/nir_lower_load_const_to_scalar.c
+++ b/src/glsl/nir/nir_lower_load_const_to_scalar.c
@@ -55,20 +55,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
    }
 
    /* Batch things back together into a vector. */
-   nir_ssa_def *vec;
-   switch (lower->def.num_components) {
-   case 2:
-      vec = nir_vec2(&b, loads[0], loads[1]);
-      break;
-   case 3:
-      vec = nir_vec3(&b, loads[0], loads[1], loads[2]);
-      break;
-   case 4:
-      vec = nir_vec4(&b, loads[0], loads[1], loads[2], loads[3]);
-      break;
-   default:
-      unreachable("Unknown load_const component count.");
-   }
+   nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components);
 
    /* Replace the old load with a reference to our reconstructed vector. */
    nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec));
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index e2f095a5532..8aaa48ab568 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -205,23 +205,7 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
       }
 
       /* and move the result back into a single vecN: */
-      switch (tex->coord_components) {
-      case 4:
-         src = nir_vec4(b, comp[0], comp[1], comp[2], comp[3]);
-         break;
-      case 3:
-         src = nir_vec3(b, comp[0], comp[1], comp[2]);
-         break;
-      case 2:
-         src = nir_vec2(b, comp[0], comp[1]);
-         break;
-      case 1:
-         src = comp[0];
-         break;
-      default:
-         unreachable("bad texture coord count");
-         break;
-      }
+      src = nir_vec(b, comp, tex->coord_components);
 
       nir_instr_rewrite_src(&tex->instr,
                             &tex->src[i].src,
-- 
cgit v1.2.3


From e13ed3ffb4d5267e7b79eece448abf1db1438ea1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 13:17:08 -0400
Subject: nir: add two-sided-color lowering pass

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/Makefile.sources                |   1 +
 src/glsl/nir/nir.h                       |   2 +
 src/glsl/nir/nir_lower_two_sided_color.c | 208 +++++++++++++++++++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 src/glsl/nir/nir_lower_two_sided_color.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index b539b45e432..1aaa062adea 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -47,6 +47,7 @@ NIR_FILES = \
 	nir/nir_lower_system_values.c \
 	nir/nir_lower_tex.c \
 	nir/nir_lower_to_source_mods.c \
+	nir/nir_lower_two_sided_color.c \
 	nir/nir_lower_vars_to_ssa.c \
 	nir/nir_lower_var_copies.c \
 	nir/nir_lower_vec_to_movs.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 255d45585db..63b0b514c50 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1878,6 +1878,8 @@ void nir_lower_idiv(nir_shader *shader);
 void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
 void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
 
+void nir_lower_two_sided_color(nir_shader *shader);
+
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
new file mode 100644
index 00000000000..131feef90af
--- /dev/null
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_COLORS 2  /* VARYING_SLOT_COL0/COL1 */
+
+typedef struct {
+   nir_builder   b;
+   nir_shader   *shader;
+   nir_variable *face;
+   struct {
+      nir_variable *front;        /* COLn */
+      nir_variable *back;         /* BFCn */
+   } colors[MAX_COLORS];
+   int colors_count;
+} lower_2side_state;
+
+
+/* Lowering pass for fragment shaders to emulated two-sided-color.  For
+ * each COLOR input, a corresponding BCOLOR input is created, and bcsel
+ * instruction used to select front or back color based on FACE.
+ */
+
+static nir_variable *
+create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
+{
+   nir_variable *var = rzalloc(shader, nir_variable);
+
+   var->data.driver_location = drvloc;
+   var->type = glsl_vec4_type();
+   var->data.mode = nir_var_shader_in;
+   var->name = ralloc_asprintf(var, "in_%d", drvloc);
+   var->data.index = 0;
+   var->data.location = slot;
+
+   exec_list_push_tail(&shader->inputs, &var->node);
+
+   return var;
+}
+
+static nir_ssa_def *
+load_input(nir_builder *b, nir_variable *in)
+{
+   nir_intrinsic_instr *load;
+
+   load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+   load->num_components = 4;
+   load->const_index[0] = in->data.driver_location;
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_builder_instr_insert(b, &load->instr);
+
+   return &load->dest.ssa;
+}
+
+static int
+setup_inputs(lower_2side_state *state)
+{
+   int maxloc = -1;
+
+   /* find color/face inputs: */
+   foreach_list_typed(nir_variable, var, node, &state->shader->inputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending BCLr/FACE after last existing input:
+       */
+      maxloc = MAX2(maxloc, loc);
+
+      switch (var->data.location) {
+      case VARYING_SLOT_COL0:
+      case VARYING_SLOT_COL1:
+         assert(state->colors_count < ARRAY_SIZE(state->colors));
+         state->colors[state->colors_count].front = var;
+         state->colors_count++;
+         break;
+      case VARYING_SLOT_FACE:
+         state->face = var;
+         break;
+      }
+   }
+
+   /* if we don't have any color inputs, nothing to do: */
+   if (state->colors_count == 0)
+      return -1;
+
+   /* if we don't already have one, insert a FACE input: */
+   if (!state->face) {
+      state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE);
+      state->face->data.interpolation = INTERP_QUALIFIER_FLAT;
+   }
+
+   /* add required back-face color inputs: */
+   for (int i = 0; i < state->colors_count; i++) {
+      gl_varying_slot slot;
+
+      if (state->colors[i].front->data.location == VARYING_SLOT_COL0)
+         slot = VARYING_SLOT_BFC0;
+      else
+         slot = VARYING_SLOT_BFC1;
+
+      state->colors[i].back = create_input(state->shader, ++maxloc, slot);
+   }
+
+   return 0;
+}
+
+static bool
+nir_lower_two_sided_color_block(nir_block *block, void *void_state)
+{
+   lower_2side_state *state = void_state;
+   nir_builder *b = &state->b;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+      if (intr->intrinsic != nir_intrinsic_load_input)
+         continue;
+
+      int idx;
+      for (idx = 0; idx < state->colors_count; idx++) {
+         unsigned drvloc =
+            state->colors[idx].front->data.driver_location;
+         if (intr->const_index[0] == drvloc) {
+            break;
+         }
+      }
+
+      if (idx == state->colors_count)
+         continue;
+
+      /* replace load_input(COLn) with
+       * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn))
+       */
+      b->cursor = nir_before_instr(&intr->instr);
+      nir_ssa_def *face  = nir_channel(b, load_input(b, state->face), 0);
+      nir_ssa_def *front = load_input(b, state->colors[idx].front);
+      nir_ssa_def *back  = load_input(b, state->colors[idx].back);
+      nir_ssa_def *cond  = nir_flt(b, face, nir_imm_float(b, 0.0));
+      nir_ssa_def *color = nir_bcsel(b, cond, back, front);
+
+      assert(intr->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color));
+   }
+
+   return true;
+}
+
+static void
+nir_lower_two_sided_color_impl(nir_function_impl *impl,
+                               lower_2side_state *state)
+{
+   nir_builder *b = &state->b;
+
+   nir_builder_init(b, impl);
+
+   nir_foreach_block(impl, nir_lower_two_sided_color_block, state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_two_sided_color(nir_shader *shader)
+{
+   lower_2side_state state = {
+      .shader = shader,
+   };
+
+   if (shader->stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   if (setup_inputs(&state) != 0)
+      return;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_two_sided_color_impl(overload->impl, &state);
+   }
+
+}
-- 
cgit v1.2.3


From 9ffc1049cae07e4e2d2dc8f4d1f5f113b4b1fdc4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 17 Sep 2015 13:35:33 -0400
Subject: freedreno/ir3: use nir two-sided-color lowering

With this, we completely switch over to nir lowering passes instead of
tgsi_lowering.  So one step closer to supporting direct glsl or spirv to
nir support for freedreno a3xx/a4xx.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 24 +++-------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index c609d3c77fd..7eddbdd3825 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -181,6 +181,8 @@ static struct nir_shader *to_nir(struct ir3_compile *ctx,
 		nir_lower_clip_fs(s, so->key.ucp_enables);
 	}
 	nir_lower_tex(s, &tex_options);
+	if (so->key.color_two_side)
+		nir_lower_two_sided_color(s);
 	nir_lower_idiv(s);
 	nir_lower_load_const_to_scalar(s);
 
@@ -212,26 +214,12 @@ static struct nir_shader *to_nir(struct ir3_compile *ctx,
 	return s;
 }
 
-/* TODO nir doesn't lower everything for us yet, but ideally it would: */
-static const struct tgsi_token *
-lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
-		struct ir3_shader_variant *so)
-{
-	struct tgsi_shader_info info;
-	struct tgsi_lowering_config lconfig = {
-			.color_two_side = so->key.color_two_side,
-	};
-
-	return tgsi_transform_lowering(&lconfig, tokens, &info);
-}
-
 static struct ir3_compile *
 compile_init(struct ir3_compiler *compiler,
 		struct ir3_shader_variant *so,
 		const struct tgsi_token *tokens)
 {
 	struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
-	const struct tgsi_token *lowered_tokens;
 
 	if (compiler->gpu_id >= 400) {
 		/* need special handling for "flat" */
@@ -258,13 +246,7 @@ compile_init(struct ir3_compiler *compiler,
 	ctx->block_ht = _mesa_hash_table_create(ctx,
 			_mesa_hash_pointer, _mesa_key_pointer_equal);
 
-	lowered_tokens = lower_tgsi(ctx, tokens, so);
-	if (!lowered_tokens)
-		lowered_tokens = tokens;
-	ctx->s = to_nir(ctx, lowered_tokens, so);
-
-	if (lowered_tokens != tokens)
-		free((void *)lowered_tokens);
+	ctx->s = to_nir(ctx, tokens, so);
 
 	so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
 
-- 
cgit v1.2.3


From c228514c72cb2fd5fb9e510808e29204fc9e7ae1 Mon Sep 17 00:00:00 2001
From: Marcin Ślusarz <marcin.slusarz@gmail.com>
Date: Sat, 19 Sep 2015 19:17:34 +0200
Subject: dri/common: use sysconfdir when looking for drirc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Useful when locally installed mesa has more quirks than the system one.

Signed-off-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 docs/autoconf.html                      | 9 ++++++++-
 src/mesa/drivers/dri/common/Makefile.am | 1 +
 src/mesa/drivers/dri/common/xmlconfig.c | 6 +++++-
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/docs/autoconf.html b/docs/autoconf.html
index 2ef8c63dbee..5c29e5ee2c6 100644
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -87,6 +87,13 @@ created in a <code>lib64</code> directory at the top of the Mesa source
 tree.</p>
 </dd>
 
+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
 <dt><code>--enable-static, --disable-shared</code></dt>
 <dd><p>By default, Mesa
 will build shared libraries. Either of these options will force static
@@ -217,7 +224,7 @@ GLX.
 <dt><code>--with-expat=DIR</code>
 <dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
 <p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
 <code>~/.drirc</code>. This option allows a specific expat installation
 to be used. For example, <code>--with-expat=/usr/local</code> will
 search for expat headers and libraries in <code>/usr/local/include</code>
diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am
index b307f10f56b..ea52cb5a0d8 100644
--- a/src/mesa/drivers/dri/common/Makefile.am
+++ b/src/mesa/drivers/dri/common/Makefile.am
@@ -34,6 +34,7 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	$(LIBDRM_CFLAGS) \
 	$(DEFINES) \
+	-DSYSCONFDIR=\"$(sysconfdir)\"
 	$(VISIBILITY_CFLAGS)
 
 noinst_LTLIBRARIES = \
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index f17693e739f..b8ab480ddfe 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -935,9 +935,13 @@ static void parseOneConfigFile (XML_Parser p) {
 #undef BUF_SIZE
 }
 
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+
 void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
 			  int screenNum, const char *driverName) {
-    char *filenames[2] = {"/etc/drirc", NULL};
+    char *filenames[2] = { SYSCONFDIR "/drirc", NULL};
     char *home;
     uint32_t i;
     struct OptConfData userData;
-- 
cgit v1.2.3


From 31a5135cd793951191c4a2fb86a50253a9931454 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 26 Aug 2015 16:25:44 -0700
Subject: mesa/formats: add MESA_LAYOUT_LATC

This enables us to predicate statments on a compressed format being
a type of LATC format. Also, remove the comment that lists the enum
(it was getting a tad long).

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/format_info.py |  2 +-
 src/mesa/main/formats.c      | 11 -----------
 src/mesa/main/formats.csv    |  8 ++++----
 src/mesa/main/formats.h      |  1 +
 src/mesa/main/texcompress.c  |  1 +
 5 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py
index 22eb5a734a6..50626a858c4 100644
--- a/src/mesa/main/format_info.py
+++ b/src/mesa/main/format_info.py
@@ -111,7 +111,7 @@ def get_channel_bits(fmat, chan_name):
             return 1 if fmat.has_channel('a') else 0
          else:
             return 0
-      elif fmat.layout == 'rgtc':
+      elif fmat.layout in ('rgtc', 'latc'):
          return 8 if fmat.has_channel(chan_name) else 0
       elif fmat.layout in ('etc1', 'etc2'):
          if fmat.name.endswith('_ALPHA1') and chan_name == 'a':
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 982103f3798..9f920075026 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -188,17 +188,6 @@ _mesa_get_format_max_bits(mesa_format format)
 
 /**
  * Return the layout type of the given format.
- * The return value will be one of:
- *    MESA_FORMAT_LAYOUT_ARRAY
- *    MESA_FORMAT_LAYOUT_PACKED
- *    MESA_FORMAT_LAYOUT_S3TC
- *    MESA_FORMAT_LAYOUT_RGTC
- *    MESA_FORMAT_LAYOUT_FXT1
- *    MESA_FORMAT_LAYOUT_ETC1
- *    MESA_FORMAT_LAYOUT_ETC2
- *    MESA_FORMAT_LAYOUT_BPTC
- *    MESA_FORMAT_LAYOUT_ASTC
- *    MESA_FORMAT_LAYOUT_OTHER
  */
 extern enum mesa_format_layout
 _mesa_get_format_layout(mesa_format format)
diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv
index 11228e0f3df..529de31212c 100644
--- a/src/mesa/main/formats.csv
+++ b/src/mesa/main/formats.csv
@@ -294,10 +294,10 @@ MESA_FORMAT_RG_RGTC2_UNORM                , rgtc  , 4, 4, x128,     ,     ,
 MESA_FORMAT_RG_RGTC2_SNORM                , rgtc  , 4, 4, x128,     ,     ,     , xy01, rgb
 
 # LATC1/2 compressed formats
-MESA_FORMAT_L_LATC1_UNORM                 , rgtc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
-MESA_FORMAT_L_LATC1_SNORM                 , rgtc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
-MESA_FORMAT_LA_LATC2_UNORM                , rgtc  , 4, 4, x128,     ,     ,     , xxxy, rgb
-MESA_FORMAT_LA_LATC2_SNORM                , rgtc  , 4, 4, x128,     ,     ,     , xxxy, rgb
+MESA_FORMAT_L_LATC1_UNORM                 , latc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
+MESA_FORMAT_L_LATC1_SNORM                 , latc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
+MESA_FORMAT_LA_LATC2_UNORM                , latc  , 4, 4, x128,     ,     ,     , xxxy, rgb
+MESA_FORMAT_LA_LATC2_SNORM                , latc  , 4, 4, x128,     ,     ,     , xxxy, rgb
 
 # ETC1/2 compressed formats
 MESA_FORMAT_ETC1_RGB8                     , etc1  , 4, 4, x64 ,     ,     ,     , xyz1, rgb
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index 2b8146ce932..794d599db3b 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -66,6 +66,7 @@ enum mesa_format_layout {
    MESA_FORMAT_LAYOUT_PACKED,
    MESA_FORMAT_LAYOUT_S3TC,
    MESA_FORMAT_LAYOUT_RGTC,
+   MESA_FORMAT_LAYOUT_LATC,
    MESA_FORMAT_LAYOUT_FXT1,
    MESA_FORMAT_LAYOUT_ETC1,
    MESA_FORMAT_LAYOUT_ETC2,
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 394c8bab214..e0294a88bfa 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -728,6 +728,7 @@ _mesa_get_compressed_fetch_func(mesa_format format)
    case MESA_FORMAT_LAYOUT_FXT1:
       return _mesa_get_fxt_fetch_func(format);
    case MESA_FORMAT_LAYOUT_RGTC:
+   case MESA_FORMAT_LAYOUT_LATC:
       return _mesa_get_compressed_rgtc_func(format);
    case MESA_FORMAT_LAYOUT_ETC1:
       return _mesa_get_etc_fetch_func(format);
-- 
cgit v1.2.3


From 29835fe19e0ca73a8017427498056ff7eb713cdd Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 26 Aug 2015 16:36:11 -0700
Subject: mesa/glformats: refactor compressed format support function

Instead of case statements, use _mesa_get_format_layout() to
determine if a GL format is part of a family of compressed formats.

v2. restrict LATC formats to API_OPENGL_COMPAT (Ilia).
    rename the variable mFormat to m_format.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/glformats.c | 119 ++++++++++++++++------------------------------
 1 file changed, 40 insertions(+), 79 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index eb4a02acd17..6cfffdb4d51 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -28,6 +28,7 @@
 #include "context.h"
 #include "glformats.h"
 #include "formats.h"
+#include "texcompress.h"
 #include "enums.h"
 
 enum {
@@ -1272,95 +1273,22 @@ _mesa_is_depth_or_stencil_format(GLenum format)
 GLboolean
 _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
 {
+   mesa_format m_format = _mesa_glenum_to_compressed_format(format);
+
+   /* Some formats in this switch have an equivalent mesa_format_layout
+    * to the compressed formats in the layout switch below and thus
+    * must be handled first.
+    */
    switch (format) {
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      /* Assume that the ANGLE flag will always be set if the EXT flag is set.
-       */
-      return ctx->Extensions.ANGLE_texture_compression_dxt;
    case GL_RGB_S3TC:
    case GL_RGB4_S3TC:
    case GL_RGBA_S3TC:
    case GL_RGBA4_S3TC:
       return _mesa_is_desktop_gl(ctx) &&
          ctx->Extensions.ANGLE_texture_compression_dxt;
-   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.EXT_texture_sRGB
-         && ctx->Extensions.EXT_texture_compression_s3tc;
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.TDFX_texture_compression_FXT1;
-   case GL_COMPRESSED_RED_RGTC1:
-   case GL_COMPRESSED_SIGNED_RED_RGTC1:
-   case GL_COMPRESSED_RG_RGTC2:
-   case GL_COMPRESSED_SIGNED_RG_RGTC2:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.ARB_texture_compression_rgtc;
-   case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
-   case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
-   case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
-   case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
-      return ctx->API == API_OPENGL_COMPAT
-         && ctx->Extensions.EXT_texture_compression_latc;
    case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
       return ctx->API == API_OPENGL_COMPAT
          && ctx->Extensions.ATI_texture_compression_3dc;
-   case GL_ETC1_RGB8_OES:
-      return _mesa_is_gles(ctx)
-         && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
-   case GL_COMPRESSED_RGB8_ETC2:
-   case GL_COMPRESSED_SRGB8_ETC2:
-   case GL_COMPRESSED_RGBA8_ETC2_EAC:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-   case GL_COMPRESSED_R11_EAC:
-   case GL_COMPRESSED_RG11_EAC:
-   case GL_COMPRESSED_SIGNED_R11_EAC:
-   case GL_COMPRESSED_SIGNED_RG11_EAC:
-   case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-   case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-      return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
-   case GL_COMPRESSED_RGBA_BPTC_UNORM:
-   case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
-   case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
-   case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
-      return _mesa_is_desktop_gl(ctx) &&
-         ctx->Extensions.ARB_texture_compression_bptc;
-   case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
-      return ctx->Extensions.KHR_texture_compression_astc_ldr;
    case GL_PALETTE4_RGB8_OES:
    case GL_PALETTE4_RGBA8_OES:
    case GL_PALETTE4_R5_G6_B5_OES:
@@ -1372,6 +1300,39 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
    case GL_PALETTE8_RGBA4_OES:
    case GL_PALETTE8_RGB5_A1_OES:
       return ctx->API == API_OPENGLES;
+   }
+
+   switch (_mesa_get_format_layout(m_format)) {
+   case MESA_FORMAT_LAYOUT_S3TC:
+      if (_mesa_get_format_color_encoding(m_format) == GL_LINEAR) {
+         /* Assume that the ANGLE flag will always be set if the
+          * EXT flag is set.
+          */
+         return ctx->Extensions.ANGLE_texture_compression_dxt;
+      } else {
+         return _mesa_is_desktop_gl(ctx)
+            && ctx->Extensions.EXT_texture_sRGB
+            && ctx->Extensions.EXT_texture_compression_s3tc;
+      }
+   case MESA_FORMAT_LAYOUT_FXT1:
+      return _mesa_is_desktop_gl(ctx)
+         && ctx->Extensions.TDFX_texture_compression_FXT1;
+   case MESA_FORMAT_LAYOUT_RGTC:
+      return _mesa_is_desktop_gl(ctx)
+         && ctx->Extensions.ARB_texture_compression_rgtc;
+   case MESA_FORMAT_LAYOUT_LATC:
+      return ctx->API == API_OPENGL_COMPAT
+         && ctx->Extensions.EXT_texture_compression_latc;
+   case MESA_FORMAT_LAYOUT_ETC1:
+      return _mesa_is_gles(ctx)
+         && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
+   case MESA_FORMAT_LAYOUT_ETC2:
+      return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
+   case MESA_FORMAT_LAYOUT_BPTC:
+      return _mesa_is_desktop_gl(ctx) &&
+         ctx->Extensions.ARB_texture_compression_bptc;
+   case MESA_FORMAT_LAYOUT_ASTC:
+      return ctx->Extensions.KHR_texture_compression_astc_ldr;
    default:
       return GL_FALSE;
    }
-- 
cgit v1.2.3


From db2777091d70c7abd28e0a8aac16b4a416e42e81 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 27 Aug 2015 16:25:48 -0700
Subject: mesa/texcompress: add compressed formats to base format utility
 function

Add S3TC and PALETTE formats.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/texcompress.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index e0294a88bfa..84973d3fe5d 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -100,6 +100,12 @@ _mesa_gl_compressed_format_base_format(GLenum format)
    case GL_ETC1_RGB8_OES:
    case GL_COMPRESSED_RGB8_ETC2:
    case GL_COMPRESSED_SRGB8_ETC2:
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_PALETTE4_RGB8_OES:
+   case GL_PALETTE4_R5_G6_B5_OES:
+   case GL_PALETTE8_RGB8_OES:
+   case GL_PALETTE8_R5_G6_B5_OES:
       return GL_RGB;
 
    case GL_COMPRESSED_RGBA:
@@ -117,6 +123,14 @@ _mesa_gl_compressed_format_base_format(GLenum format)
    case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
    case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
    case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_PALETTE4_RGBA8_OES:
+   case GL_PALETTE8_RGB5_A1_OES:
+   case GL_PALETTE4_RGBA4_OES:
+   case GL_PALETTE4_RGB5_A1_OES:
+   case GL_PALETTE8_RGBA8_OES:
+   case GL_PALETTE8_RGBA4_OES:
       return GL_RGBA;
 
    case GL_COMPRESSED_ALPHA:
-- 
cgit v1.2.3


From 99b1f4751f97631011b64fabcb57acf6beae01ac Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 27 Aug 2015 16:29:06 -0700
Subject: mesa/teximage: reuse compressed format utility functions for
 base_format

Reuse utility functions instead of reimplementing the same logic.

* _mesa_is_compressed_format() performs the required checking to
  determine format support in the current context.
* _mesa_gl_compressed_format_base_format() returns the base format.

As a side effect, we now check that we're in a desktop context when
determining support for the FXT1 and RGTC formats. This is in agreement
with our extension table and the glext headers.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/teximage.c | 150 ++---------------------------------------------
 1 file changed, 5 insertions(+), 145 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 2a4d29daf2f..8913a72ad03 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -252,44 +252,11 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       ; /* fallthrough */
    }
 
-   if (ctx->Extensions.TDFX_texture_compression_FXT1) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB_FXT1_3DFX:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA_FXT1_3DFX:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   /* Assume that the ANGLE flag will always be set if the EXT flag is set.
-    */
-   if (ctx->Extensions.ANGLE_texture_compression_dxt) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (_mesa_is_desktop_gl(ctx)
-       && ctx->Extensions.ANGLE_texture_compression_dxt) {
-      switch (internalFormat) {
-      case GL_RGB_S3TC:
-      case GL_RGB4_S3TC:
-         return GL_RGB;
-      case GL_RGBA_S3TC:
-      case GL_RGBA4_S3TC:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
+   if (_mesa_is_compressed_format(ctx, internalFormat)) {
+      GLenum base_compressed =
+         _mesa_gl_compressed_format_base_format(internalFormat);
+      if (base_compressed)
+            return base_compressed;
    }
 
    if (ctx->Extensions.MESA_ycbcr_texture) {
@@ -367,16 +334,10 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       case GL_SRGB8_EXT:
       case GL_COMPRESSED_SRGB_EXT:
          return GL_RGB;
-      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
       case GL_SRGB_ALPHA_EXT:
       case GL_SRGB8_ALPHA8_EXT:
       case GL_COMPRESSED_SRGB_ALPHA_EXT:
          return GL_RGBA;
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
       case GL_SLUMINANCE_ALPHA_EXT:
       case GL_SLUMINANCE8_ALPHA8_EXT:
       case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
@@ -521,111 +482,10 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
       }
    }
 
-   if (ctx->Extensions.ARB_texture_compression_rgtc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RED_RGTC1:
-      case GL_COMPRESSED_SIGNED_RED_RGTC1:
-         return GL_RED;
-      case GL_COMPRESSED_RG_RGTC2:
-      case GL_COMPRESSED_SIGNED_RG_RGTC2:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->API == API_OPENGL_COMPAT &&
-       ctx->Extensions.EXT_texture_compression_latc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
-      case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
-         return GL_LUMINANCE;
-      case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
-      case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->API == API_OPENGL_COMPAT &&
-       ctx->Extensions.ATI_texture_compression_3dc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (_mesa_is_gles(ctx) &&
-      ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
-      switch (internalFormat) {
-      case GL_ETC1_RGB8_OES:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
    if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
        _mesa_is_astc_format(internalFormat))
          return GL_RGBA;
 
-   if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB8_ETC2:
-      case GL_COMPRESSED_SRGB8_ETC2:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA8_ETC2_EAC:
-      case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-      case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-      case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-         return GL_RGBA;
-      case GL_COMPRESSED_R11_EAC:
-      case GL_COMPRESSED_SIGNED_R11_EAC:
-         return GL_RED;
-      case GL_COMPRESSED_RG11_EAC:
-      case GL_COMPRESSED_SIGNED_RG11_EAC:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (_mesa_is_desktop_gl(ctx) &&
-       ctx->Extensions.ARB_texture_compression_bptc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGBA_BPTC_UNORM:
-      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
-         return GL_RGBA;
-      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
-      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->API == API_OPENGLES) {
-      switch (internalFormat) {
-      case GL_PALETTE4_RGB8_OES:
-      case GL_PALETTE4_R5_G6_B5_OES:
-      case GL_PALETTE8_RGB8_OES:
-      case GL_PALETTE8_R5_G6_B5_OES:
-	 return GL_RGB;
-      case GL_PALETTE4_RGBA8_OES:
-      case GL_PALETTE8_RGB5_A1_OES:
-      case GL_PALETTE4_RGBA4_OES:
-      case GL_PALETTE4_RGB5_A1_OES:
-      case GL_PALETTE8_RGBA8_OES:
-      case GL_PALETTE8_RGBA4_OES:
-	 return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
    return -1; /* error */
 }
 
-- 
cgit v1.2.3


From 8f6fd57db2275df8f86a5a173575b7d807508625 Mon Sep 17 00:00:00 2001
From: Marcin Ślusarz <marcin.slusarz@gmail.com>
Date: Sun, 20 Sep 2015 13:40:10 +0200
Subject: dri/common: fix gbm-symbols-check regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broken by commit c228514c72cb2fd5fb9e510808e29204fc9e7ae1
"dri/common: use sysconfdir when looking for drirc".

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92054
Signed-off-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
---
 src/mesa/drivers/dri/common/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am
index ea52cb5a0d8..2973a061e99 100644
--- a/src/mesa/drivers/dri/common/Makefile.am
+++ b/src/mesa/drivers/dri/common/Makefile.am
@@ -34,7 +34,7 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	$(LIBDRM_CFLAGS) \
 	$(DEFINES) \
-	-DSYSCONFDIR=\"$(sysconfdir)\"
+	-DSYSCONFDIR=\"$(sysconfdir)\" \
 	$(VISIBILITY_CFLAGS)
 
 noinst_LTLIBRARIES = \
-- 
cgit v1.2.3


From 6ba291db4ba4f03ac94560eaae861bc162ac838e Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Fri, 18 Sep 2015 10:30:12 +0200
Subject: i965/vec4/nir: Remove all "this->" snippets

For consistency, either we have all class members dereferenced, or none.
In this case, very few are so lets get rid of them all.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 31 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 482fce20cd9..20c063d0010 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -58,25 +58,24 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
       unreachable("should be lowered by lower_vertex_id().");
 
    case nir_intrinsic_load_vertex_id_zero_base:
-      reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+      reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
       if (reg->file == BAD_FILE)
-         *reg =
-            *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
-                                             glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+                                           glsl_type::int_type);
       break;
 
    case nir_intrinsic_load_base_vertex:
-      reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+      reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
       if (reg->file == BAD_FILE)
-         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
-                                                 glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+                                           glsl_type::int_type);
       break;
 
    case nir_intrinsic_load_instance_id:
-      reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+      reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
       if (reg->file == BAD_FILE)
-         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
-                                                 glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+                                           glsl_type::int_type);
       break;
 
    default:
@@ -142,7 +141,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
          }
 
          assert(uniforms < uniform_array_size);
-         this->uniform_size[uniforms] = type_size_vec4(var->type);
+         uniform_size[uniforms] = type_size_vec4(var->type);
 
          if (strncmp(var->name, "gl_", 3) == 0)
             nir_setup_builtin_uniform(var);
@@ -158,7 +157,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
              strcmp(var->name, "parameters") == 0);
 
       assert(uniforms < uniform_array_size);
-      this->uniform_size[uniforms] = type_size_vec4(var->type);
+      uniform_size[uniforms] = type_size_vec4(var->type);
 
       struct gl_program_parameter_list *plist = prog->Parameters;
       for (unsigned p = 0; p < plist->NumParameters; p++) {
@@ -243,10 +242,10 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
        * ParameterValues directly, since unlike brw_fs.cpp, we never
        * add new state references during compile.
        */
-      int index = _mesa_add_state_reference(this->prog->Parameters,
+      int index = _mesa_add_state_reference(prog->Parameters,
 					    (gl_state_index *)slots[i].tokens);
       gl_constant_value *values =
-         &this->prog->Parameters->ParameterValues[index][0];
+         &prog->Parameters->ParameterValues[index][0];
 
       assert(uniforms < uniform_array_size);
 
@@ -254,7 +253,7 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
          stage_prog_data->param[uniforms * 4 + j] =
             &values[GET_SWZ(slots[i].swizzle, j)];
 
-      this->uniform_vector_size[uniforms] =
+      uniform_vector_size[uniforms] =
          (var->type->is_scalar() || var->type->is_vector() ||
           var->type->is_matrix() ? var->type->vector_elements : 4);
 
@@ -344,7 +343,7 @@ vec4_visitor::nir_emit_block(nir_block *block)
 void
 vec4_visitor::nir_emit_instr(nir_instr *instr)
 {
-   this->base_ir = instr;
+   base_ir = instr;
 
    switch (instr->type) {
    case nir_instr_type_load_const:
-- 
cgit v1.2.3


From b65f91dd3285ca0daee658cdf9ac41caaad2f1fb Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sun, 20 Sep 2015 14:02:29 -0400
Subject: nir/print: fix coverity error

Not something actually hit in real life (now state is never non-null,
but only case state->syms is null is if nir_print_instr() path).  But it
was something I overlooked the first time, so might as well fix it.

    *** CID 1324642:  Null pointer dereferences  (REVERSE_INULL)
    /src/glsl/nir/nir_print.c: 299 in print_var_decl()
    293
    294           fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
    295        }
    296
    297        fprintf(fp, "\n");
    298
    >>>     CID 1324642:  Null pointer dereferences  (REVERSE_INULL)
    >>>     Null-checking "state" suggests that it may be null, but it has already been dereferenced on all paths leading to the check.
    299        if (state) {
    300           _mesa_set_add(state->syms, name);
    301           _mesa_hash_table_insert(state->ht, var, name);
    302        }
    303     }
    304

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir_print.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 6e86140ed9d..a19aa8b9132 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -296,7 +296,7 @@ print_var_decl(nir_variable *var, print_state *state)
 
    fprintf(fp, "\n");
 
-   if (state) {
+   if (state->syms) {
       _mesa_set_add(state->syms, name);
       _mesa_hash_table_insert(state->ht, var, name);
    }
-- 
cgit v1.2.3


From d48ac93066190077510d635e71631b6574261d08 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 18 Sep 2015 08:15:52 +0200
Subject: i965: Maximum allowed size of SEND messages is 15 (4 bits)

Until now we only used MRFs 1..15 for regular SEND messages, so the
message length could not possibly exceed the maximum size. Soon we'll
allow to use MRF registers 1..23 in gen6, so we need to be careful
not to build messages that can go beyond the limit. That could occur,
specifically, when building URB write messages, which we may need to
split in chunks due to their size. Previously we would simply go and
create a new message when we reached MRF 13 (since 13..15 were
reserved for spilling), now we also want to check the size of the
message explicitly.

Besides adding that condition to split URB write messages properly,
this patch also adds asserts in the generator. Notice that
brw_inst_set_mlen already asserts for this, but asserting in the
generators is easy and can make debugging easier in some cases.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 2 ++
 src/mesa/drivers/dri/i965/brw_inst.h             | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 5 +++--
 4 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 90805e45ad7..688f431f5c6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1558,6 +1558,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
       brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
+      assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
       switch (inst->exec_size) {
       case 1:
       case 2:
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index 46eff1dd381..c5132ba15ed 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -39,6 +39,9 @@
 extern "C" {
 #endif
 
+/** Maximum SEND message length */
+#define BRW_MAX_MSG_LENGTH 15
+
 /* brw_context.h has a forward declaration of brw_inst, so name the struct. */
 typedef struct brw_inst {
    uint64_t data[2];
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 195033358fb..f11d3c3615d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1134,6 +1134,8 @@ vec4_generator::generate_code(const cfg_t *cfg)
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
 
+      assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
       unsigned pre_emit_nr_insn = p->nr_insn;
 
       if (dst.width == BRW_WIDTH_4) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 04657704405..e210bb4ad2c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3318,9 +3318,10 @@ vec4_visitor::emit_vertex()
                        prog_data->vue_map.slot_to_varying[slot]);
 
          /* If this was max_usable_mrf, we can't fit anything more into this
-          * URB WRITE.
+          * URB WRITE. Same thing if we reached the maximum length available.
           */
-         if (mrf > max_usable_mrf) {
+         if (mrf > max_usable_mrf ||
+             align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
             slot++;
             break;
          }
-- 
cgit v1.2.3


From 085861083638ec782c17d3aa72ab46f1a0099935 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Wed, 16 Sep 2015 09:08:19 +0200
Subject: i965: Move MRF register asserts out of brw_reg.h

In a later patch we will make BRW_MAX_MRF return a different value depending
on the hardware generation, but it is inconvenient to add a gen parameter
to the brw_reg functions only for the assertions, so move these to places where
we have the hardware generation available.

Ken suggested to add the asserts to brw_set_src0 and brw_set_dest since that
would make sure that we catch all uses of MRF registers, even those coming
from modules that generate native code directly, like blorp. Unfortunately,
this is very late in the process which can make things harder to debug, so add
asserts to the generator as well.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c          | 9 ++++++---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 5 ++++-
 src/mesa/drivers/dri/i965/brw_reg.h              | 7 ++++---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 ++
 4 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0432efa7175..23a120ea72d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -146,8 +146,9 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
 {
    const struct brw_device_info *devinfo = p->devinfo;
 
-   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
-       dest.file != BRW_MESSAGE_REGISTER_FILE)
+   if (dest.file == BRW_MESSAGE_REGISTER_FILE)
+      assert(dest.nr < BRW_MAX_MRF);
+   else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &dest);
@@ -309,7 +310,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 {
    const struct brw_device_info *devinfo = p->devinfo;
 
-   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file == BRW_MESSAGE_REGISTER_FILE)
+      assert(reg.nr < BRW_MAX_MRF);
+   else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 688f431f5c6..b974e9c4626 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -53,8 +53,10 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
    struct brw_reg brw_reg;
 
    switch (reg->file) {
-   case GRF:
    case MRF:
+      assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF);
+      /* Fallthrough */
+   case GRF:
       if (reg->stride == 0) {
          brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
       } else if (inst->exec_size < 8) {
@@ -1558,6 +1560,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
       brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
 
       switch (inst->exec_size) {
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 31806f769bd..06d9269f4d8 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -344,10 +344,12 @@ brw_reg(unsigned file,
    struct brw_reg reg;
    if (file == BRW_GENERAL_REGISTER_FILE)
       assert(nr < BRW_MAX_GRF);
-   else if (file == BRW_MESSAGE_REGISTER_FILE)
-      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
       assert(nr <= BRW_ARF_TIMESTAMP);
+   /* Asserting on the MRF register number requires to know the hardware gen
+    * (gen6 has 24 MRF registers), which we don't know here, so we assert
+    * for that in the generators and in brw_eu_emit.c
+    */
 
    reg.type = type;
    reg.file = file;
@@ -808,7 +810,6 @@ brw_mask_reg(unsigned subnr)
 static inline struct brw_reg
 brw_message_reg(unsigned nr)
 {
-   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index f11d3c3615d..6618275e391 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -46,6 +46,7 @@ vec4_instruction::get_dst(void)
       break;
 
    case MRF:
+      assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF);
       brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
       brw_reg = retype(brw_reg, dst.type);
       brw_reg.dw1.bits.writemask = dst.writemask;
@@ -1134,6 +1135,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
 
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
 
       unsigned pre_emit_nr_insn = p->nr_insn;
-- 
cgit v1.2.3


From f50645d05c6dffa6463856ded0b8461ac9d24535 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 15 Sep 2015 16:00:26 +0200
Subject: i965: Turn BRW_MAX_MRF into a macro that accepts a hardware
 generation

There are some bug reports about shaders failing to compile in gen6
because MRF 14 is used when we need to spill. For example:
https://bugs.freedesktop.org/show_bug.cgi?id=86469
https://bugs.freedesktop.org/show_bug.cgi?id=90631

Discussion in bugzilla pointed to the fact that gen6 might actually have
24 MRF registers available instead of 16, so we could use other MRF
registers and avoid these conflicts (we still need to investigate why
some shaders need up to MRF 14 anyway, since this is not expected).

Notice that the hardware docs are not clear about this fact:

SNB PRM Vol4 Part2's "Table 5-4. MRF Registers Available in Device
Hardware" says "Number per Thread" - "24 registers"

However, SNB PRM Vol4 Part1, 1.6.1 Message Register File (MRF) says:

"Normal threads should construct their messages in m1..m15. (...)
Regardless of actual hardware implementation, the thread should
not assume th at MRF addresses above m15 wrap to legal MRF registers."

Therefore experimentation was necessary to evaluate if we had these extra
MRF registers available or not. This was tested in gen6 using MRF
registers 21..23 for spilling and doing a full piglit run (all.py) forcing
spilling of everything on the FS backend. It was also tested by doing
spilling of everything on both the FS and the VS backends with a piglit run
of shader.py. In both cases no regressions were observed. In fact, many of
these tests where helped in the cases where we forced spilling, since that
triggered the same underlying problem described in the bug reports. Here are
some results using INTEL_DEBUG=spill_fs,spill_vec4 for a shader.py run on
gen6 hardware:

Using MRFs 13..15 for spilling:
crash: 2, fail: 113, pass: 6621, skip: 5461

Using MRFs 21..23 for spilling:
crash: 2, fail: 12, pass: 6722, skip: 5461

This patch sets the ground for later patches to implement spilling
using MRF registers 21..23 in gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c                 |  6 +++---
 src/mesa/drivers/dri/i965/brw_fs.cpp                    |  4 ++--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp          | 12 ++++++------
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp       | 16 ++++++++--------
 src/mesa/drivers/dri/i965/brw_ir_vec4.h                 |  2 +-
 src/mesa/drivers/dri/i965/brw_reg.h                     |  2 +-
 src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp |  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp        | 10 +++++-----
 8 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 23a120ea72d..6a4e316b43c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -147,7 +147,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (dest.file == BRW_MESSAGE_REGISTER_FILE)
-      assert(dest.nr < BRW_MAX_MRF);
+      assert(dest.nr < BRW_MAX_MRF(devinfo->gen));
    else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
@@ -311,7 +311,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (reg.file == BRW_MESSAGE_REGISTER_FILE)
-      assert(reg.nr < BRW_MAX_MRF);
+      assert(reg.nr < BRW_MAX_MRF(devinfo->gen));
    else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
@@ -2485,7 +2485,7 @@ void brw_urb_WRITE(struct brw_codegen *p,
 
    insn = next_insn(p, BRW_OPCODE_SEND);
 
-   assert(msg_length < BRW_MAX_MRF);
+   assert(msg_length < BRW_MAX_MRF(devinfo->gen));
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b4d05674260..225a3122c79 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2795,7 +2795,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
 {
    int write_len = inst->regs_written;
    int first_write_grf = inst->dst.reg;
-   bool needs_dep[BRW_MAX_MRF];
+   bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
 
    memset(needs_dep, false, sizeof(needs_dep));
@@ -2866,7 +2866,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
 {
    int write_len = inst->regs_written;
    int first_write_grf = inst->dst.reg;
-   bool needs_dep[BRW_MAX_MRF];
+   bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
 
    memset(needs_dep, false, sizeof(needs_dep));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index b974e9c4626..c65084d0608 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -48,13 +48,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
 }
 
 static struct brw_reg
-brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 {
    struct brw_reg brw_reg;
 
    switch (reg->file) {
    case MRF:
-      assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF);
+      assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
       /* Fallthrough */
    case GRF:
       if (reg->stride == 0) {
@@ -420,7 +420,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
    brw_fb_WRITE(p,
                 16 /* dispatch_width */,
                 brw_message_reg(inst->base_mrf),
-                brw_reg_from_fs_reg(inst, &inst->src[0]),
+                brw_reg_from_fs_reg(inst, &inst->src[0], devinfo->gen),
                 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
                 inst->target,
                 inst->mlen,
@@ -1538,7 +1538,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
 
       for (unsigned int i = 0; i < inst->sources; i++) {
-	 src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
+	 src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen);
 
 	 /* The accumulator result appears to get used for the
 	  * conditional modifier generation.  When negating a UD
@@ -1550,7 +1550,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 		inst->src[i].type != BRW_REGISTER_TYPE_UD ||
 		!inst->src[i].negate);
       }
-      dst = brw_reg_from_fs_reg(inst, &inst->dst);
+      dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen);
 
       brw_set_default_predicate_control(p, inst->predicate);
       brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1560,7 +1560,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
       brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
-      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
 
       switch (inst->exec_size) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 570b4fedffe..21fb3de104a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -478,7 +478,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
 {
    int reg_width = v->dispatch_width / 8;
 
-   memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
+   memset(mrf_used, 0, BRW_MAX_MRF(v->devinfo->gen) * sizeof(bool));
 
    foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
       if (inst->dst.file == MRF) {
@@ -509,11 +509,11 @@ static void
 setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
                             int first_mrf_node, int *first_used_mrf)
 {
-   bool mrf_used[BRW_MAX_MRF];
+   bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)];
    get_used_mrfs(v, mrf_used);
 
-   *first_used_mrf = BRW_MAX_MRF;
-   for (int i = 0; i < BRW_MAX_MRF; i++) {
+   *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen);
+   for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) {
       /* Mark each MRF reg node as being allocated to its physical register.
        *
        * The alternative would be to have per-physical-register classes, which
@@ -593,7 +593,7 @@ fs_visitor::assign_regs(bool allow_spilling)
 
    setup_payload_interference(g, payload_node_count, first_payload_node);
    if (devinfo->gen >= 7) {
-      int first_used_mrf = BRW_MAX_MRF;
+      int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
       setup_mrf_hack_interference(this, g, first_mrf_hack_node,
                                   &first_used_mrf);
 
@@ -616,7 +616,7 @@ fs_visitor::assign_regs(bool allow_spilling)
              * register early enough in the register file that we don't
              * conflict with any used MRF hack registers.
              */
-            reg -= BRW_MAX_MRF - first_used_mrf;
+            reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
 
             ra_set_node_reg(g, inst->src[0].reg, reg);
             break;
@@ -853,10 +853,10 @@ fs_visitor::spill_reg(int spill_reg)
     * SIMD16 mode, because we'd stomp the FB writes.
     */
    if (!spilled_any_registers) {
-      bool mrf_used[BRW_MAX_MRF];
+      bool mrf_used[BRW_MAX_MRF(devinfo->gen)];
       get_used_mrfs(this, mrf_used);
 
-      for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
+      for (int i = spill_base_mrf; i < BRW_MAX_MRF(devinfo->gen); i++) {
          if (mrf_used[i]) {
             fail("Register spilling not supported with m%d used", i);
           return;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 966a410a15d..6e8b16139d3 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,7 +161,7 @@ public:
                     const src_reg &src1 = src_reg(),
                     const src_reg &src2 = src_reg());
 
-   struct brw_reg get_dst(void);
+   struct brw_reg get_dst(unsigned gen);
    struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
 
    dst_reg dst;
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 06d9269f4d8..87e7e011541 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -70,7 +70,7 @@ struct brw_device_info;
 #define GEN7_MRF_HACK_START 112
 
 /** Number of message register file registers */
-#define BRW_MAX_MRF 16
+#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
 
 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index b49961fff68..4e43e5ccdbd 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -762,7 +762,7 @@ fs_instruction_scheduler::calculate_deps()
     * GRF registers.
     */
    schedule_node *last_grf_write[grf_count * 16];
-   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
    schedule_node *last_conditional_mod[2] = { NULL, NULL };
    schedule_node *last_accumulator_write = NULL;
    /* Fixed HW registers are assumed to be separate from the virtual
@@ -1035,7 +1035,7 @@ void
 vec4_instruction_scheduler::calculate_deps()
 {
    schedule_node *last_grf_write[grf_count];
-   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
    schedule_node *last_conditional_mod = NULL;
    schedule_node *last_accumulator_write = NULL;
    /* Fixed HW registers are assumed to be separate from the virtual
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 6618275e391..05f20441adb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -34,7 +34,7 @@ extern "C" {
 namespace brw {
 
 struct brw_reg
-vec4_instruction::get_dst(void)
+vec4_instruction::get_dst(unsigned gen)
 {
    struct brw_reg brw_reg;
 
@@ -46,7 +46,7 @@ vec4_instruction::get_dst(void)
       break;
 
    case MRF:
-      assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF);
+      assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
       brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
       brw_reg = retype(brw_reg, dst.type);
       brw_reg.dw1.bits.writemask = dst.writemask;
@@ -490,7 +490,7 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
    brw_push_insn_state(p);
    brw_set_default_access_mode(p, BRW_ALIGN_1);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(inst->get_dst(), 0),
+   brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
            get_element_ud(inst->get_src(this->prog_data, 0), 0));
    brw_set_default_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
@@ -1126,7 +1126,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
       for (unsigned int i = 0; i < 3; i++) {
 	 src[i] = inst->get_src(this->prog_data, i);
       }
-      dst = inst->get_dst();
+      dst = inst->get_dst(devinfo->gen);
 
       brw_set_default_predicate_control(p, inst->predicate);
       brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1135,7 +1135,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
 
-      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF);
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
 
       unsigned pre_emit_nr_insn = p->nr_insn;
-- 
cgit v1.2.3


From 6789a32075774fc332eb7432910c7fbc21ee2026 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 15 Sep 2015 16:33:48 +0200
Subject: i965/fs: Use MRF registers 21-23 for spilling in gen6

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 21fb3de104a..6900cee86f4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -30,6 +30,8 @@
 #include "glsl/glsl_types.h"
 #include "glsl/ir_optimization.h"
 
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
 using namespace brw;
 
 static void
@@ -727,7 +729,7 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
       unspill_inst->regs_written = reg_size;
 
       if (!gen7_read) {
-         unspill_inst->base_mrf = 14;
+         unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
          unspill_inst->mlen = 1; /* header contains offset */
       }
 
@@ -741,9 +743,9 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
                        uint32_t spill_offset, int count)
 {
    int reg_size = 1;
-   int spill_base_mrf = 14;
+   int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
    if (dispatch_width == 16 && count % 2 == 0) {
-      spill_base_mrf = 13;
+      spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen);
       reg_size = 2;
    }
 
@@ -843,7 +845,8 @@ fs_visitor::spill_reg(int spill_reg)
    int size = alloc.sizes[spill_reg];
    unsigned int spill_offset = last_scratch;
    assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
-   int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
+   int spill_base_mrf = dispatch_width > 8 ? FIRST_SPILL_MRF(devinfo->gen) :
+                                             FIRST_SPILL_MRF(devinfo->gen) + 1;
 
    /* Spills may use MRFs 13-15 in the SIMD16 case.  Our texturing is done
     * using up to 11 MRFs starting from either m1 or m2, and fb writes can use
-- 
cgit v1.2.3


From 5d23ce2f15bda866990750b49d7860144dff2e68 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 17 Sep 2015 13:43:52 +0200
Subject: i965/vec4: Use MRF registers 21-23 for spilling in gen6

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e210bb4ad2c..514ccd6daf2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -26,6 +26,8 @@
 #include "glsl/ir_uniform.h"
 #include "program/sampler.h"
 
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
 namespace brw {
 
 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@ -256,7 +258,7 @@ vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
 
    inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
 					dst, index);
-   inst->base_mrf = 14;
+   inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
    inst->mlen = 2;
 
    return inst;
@@ -270,7 +272,7 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
 
    inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
 					dst, src, index);
-   inst->base_mrf = 13;
+   inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
    inst->mlen = 3;
 
    return inst;
@@ -1384,7 +1386,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
                                            dst,
                                            surf_index,
                                            offset_reg);
-      pull->base_mrf = 14;
+      pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
       pull->mlen = 1;
    }
 
@@ -3284,7 +3286,7 @@ vec4_visitor::emit_vertex()
     * may need to unspill a register or load from an array.  Those
     * reads would use MRFs 14-15.
     */
-   int max_usable_mrf = 13;
+   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
 
    /* The following assertion verifies that max_usable_mrf causes an
     * even-numbered amount of URB write data, which will meet gen6's
-- 
cgit v1.2.3


From 7d5162bdc0850c80f4b9427a2aac6b42c7dcceaa Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 19 Sep 2015 16:19:26 -0400
Subject: radeonsi: load fmask ptr relative to the resources array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

res_ptr already contains the resource values. fmask_ptr needs to be
looked up relative to the start of the resource params.

Note that this only affects indirect loads of MS sampler arrays.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b108664f808..e92a3d2a2ec 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2350,7 +2350,7 @@ static void tex_fetch_args(
 						 lp_build_const_int32(gallivm,
 								      SI_FMASK_TEX_OFFSET), "");
 			fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-			fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+			fmask_ptr = build_indexed_load_const(si_shader_ctx, fmask_ptr, ind_index);
 		}
 	} else {
 		res_ptr = si_shader_ctx->resources[sampler_index];
-- 
cgit v1.2.3


From 72ebd532a163fd92d96a94a4260da1bfb75a62c8 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 18 Sep 2015 19:08:35 -0400
Subject: radeonsi: implement TXQS support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Tested-by: Fredrik Bruhn <f@unibap.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 docs/GL3.txt                             |  2 +-
 docs/relnotes/11.1.0.html                |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 92 +++++++++++++++++++++++---------
 4 files changed, 71 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index bd44d1293f5..92941cf0d74 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -194,7 +194,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_direct_state_access                           DONE (all drivers)
   GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
   GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
   GL_KHR_robust_buffer_access_behavior                 not started
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 24fdf2e9683..89b9a191176 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -45,7 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
-<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 </ul>
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index ae1ff7eef43..01fa5252f71 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -284,6 +284,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:
 	case PIPE_CAP_TEXTURE_QUERY_LOD:
 	case PIPE_CAP_TEXTURE_GATHER_SM5:
+	case PIPE_CAP_TGSI_TXQS:
 		return 1;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -325,7 +326,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_VERTEXID_NOBASE:
-	case PIPE_CAP_TGSI_TXQS:
 		return 0;
 
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e92a3d2a2ec..2e49a215763 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2305,29 +2305,17 @@ static void set_tex_fetch_args(struct gallivm_state *gallivm,
 
 static const struct lp_build_tgsi_action tex_action;
 
-static void tex_fetch_args(
+static void tex_fetch_ptrs(
 	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
+	struct lp_build_emit_data * emit_data,
+	LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
 {
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
-	LLVMBuilderRef builder = gallivm->builder;
 	const struct tgsi_full_instruction * inst = emit_data->inst;
-	unsigned opcode = inst->Instruction.Opcode;
 	unsigned target = inst->Texture.Texture;
-	LLVMValueRef coords[5], derivs[6];
-	LLVMValueRef address[16];
-	int ref_pos;
-	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
-	unsigned count = 0;
-	unsigned chan;
 	unsigned sampler_src;
 	unsigned sampler_index;
-	unsigned num_deriv_channels = 0;
-	bool has_offset = inst->Texture.NumOffsets > 0;
-	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
-	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-	unsigned dmask = 0xf;
 
 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
 	sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
@@ -2338,25 +2326,50 @@ static void tex_fetch_args(
 
 		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
 
-		res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-		res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+		*res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+		*res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
 
-		samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
-		samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+		*samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+		*samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
 
 		if (target == TGSI_TEXTURE_2D_MSAA ||
 		    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
 			ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
 						 lp_build_const_int32(gallivm,
 								      SI_FMASK_TEX_OFFSET), "");
-			fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-			fmask_ptr = build_indexed_load_const(si_shader_ctx, fmask_ptr, ind_index);
+			*fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+			*fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
 		}
 	} else {
-		res_ptr = si_shader_ctx->resources[sampler_index];
-		samp_ptr = si_shader_ctx->samplers[sampler_index];
-		fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+		*res_ptr = si_shader_ctx->resources[sampler_index];
+		*samp_ptr = si_shader_ctx->samplers[sampler_index];
+		*fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
 	}
+}
+
+static void tex_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+	unsigned opcode = inst->Instruction.Opcode;
+	unsigned target = inst->Texture.Texture;
+	LLVMValueRef coords[5], derivs[6];
+	LLVMValueRef address[16];
+	int ref_pos;
+	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
+	unsigned count = 0;
+	unsigned chan;
+	unsigned num_deriv_channels = 0;
+	bool has_offset = inst->Texture.NumOffsets > 0;
+	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+	unsigned dmask = 0xf;
+
+	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
 
 	if (opcode == TGSI_OPCODE_TXQ) {
 		if (target == TGSI_TEXTURE_BUFFER) {
@@ -2800,6 +2813,36 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 	}
 }
 
+static void si_llvm_emit_txqs(
+	const struct lp_build_tgsi_action * action,
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+	LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+	LLVMValueRef res, samples;
+	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+
+	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
+
+
+	/* Read the samples from the descriptor directly. */
+	res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+	samples = LLVMBuildExtractElement(
+		builder, res,
+		lp_build_const_int32(gallivm, 3), "");
+	samples = LLVMBuildLShr(builder, samples,
+				lp_build_const_int32(gallivm, 16), "");
+	samples = LLVMBuildAnd(builder, samples,
+			       lp_build_const_int32(gallivm, 0xf), "");
+	samples = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1),
+			       samples, "");
+
+	emit_data->output[emit_data->chan] = samples;
+}
+
 /*
  * SI implements derivatives using the local data store (LDS)
  * All writes to the LDS happen in all executing threads at
@@ -3975,6 +4018,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 	bld_base->op_actions[TGSI_OPCODE_TXQ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
+	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
 
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
 	bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
-- 
cgit v1.2.3


From c1070550c289d48ef389aeb8c564d1abd1123ad1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 21 Sep 2015 07:42:27 -0700
Subject: i965: Fix MRF register number assertions for compr4.

compr4 is represented by setting the high bit on the MRF number.
We need to mask it out before sanity checking the register number.

Fixes ~8000 assert fails on Ironlake and G45.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92066
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6a4e316b43c..dc699bb6321 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -147,7 +147,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (dest.file == BRW_MESSAGE_REGISTER_FILE)
-      assert(dest.nr < BRW_MAX_MRF(devinfo->gen));
+      assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
    else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
@@ -311,7 +311,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (reg.file == BRW_MESSAGE_REGISTER_FILE)
-      assert(reg.nr < BRW_MAX_MRF(devinfo->gen));
+      assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
    else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
-- 
cgit v1.2.3


From bdb1faf44eac0128baaab62bd4c425c7c3424299 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 17 Sep 2015 16:03:48 +0100
Subject: nir: move stdio.h inclusion before extern C

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/nir/nir_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index a8ff8f2c606..bf2faf496c4 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -27,6 +27,8 @@
 
 #pragma once
 
+#include <stdio.h>
+
 /* C wrapper around glsl_types.h */
 
 #include "../glsl_types.h"
@@ -37,8 +39,6 @@ extern "C" {
 struct glsl_type;
 #endif
 
-#include <stdio.h>
-
 void glsl_print_type(const struct glsl_type *type, FILE *fp);
 void glsl_print_struct(const struct glsl_type *type, FILE *fp);
 
-- 
cgit v1.2.3


From d130cda4531602185dcf7cafe217c9b7abdba954 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 17 Sep 2015 15:57:26 +0100
Subject: nir: add C wrapper around glsl_type::record_location_offset

This will allow us to convert nir_lower_sampler.cpp to C.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/nir/nir_types.cpp | 7 +++++++
 src/glsl/nir/nir_types.h   | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index 940c676005a..da9807f0e62 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -112,6 +112,13 @@ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
    return type->fields.structure[index].name;
 }
 
+unsigned
+glsl_get_record_location_offset(const struct glsl_type *type,
+                                unsigned length)
+{
+   return type->record_location_offset(length);
+}
+
 bool
 glsl_type_is_void(const glsl_type *type)
 {
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index bf2faf496c4..49d6a65e7c4 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -62,6 +62,8 @@ unsigned glsl_get_length(const struct glsl_type *type);
 const char *glsl_get_struct_elem_name(const struct glsl_type *type,
                                       unsigned index);
 
+unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+                                         unsigned length);
 
 bool glsl_type_is_void(const struct glsl_type *type);
 bool glsl_type_is_vector(const struct glsl_type *type);
-- 
cgit v1.2.3


From de7ffdb38343b7f9292184ca827987de9852c80c Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 17 Sep 2015 16:12:17 +0100
Subject: nir: rename nir_lower_samplers.c{pp,}

With the only C++ function having its own wrapper we can 'demote' this
file to a normal C one. This allows us to get rid of extern C { #include
<foo.h> } 'hacks'. Plus some of the headers may use C99 initializers,
which are not supported by the ISO standard.

This may cause build issue on incremental builds. If so run the
following:

sed -i -e 's|samplers\.cpp|samplers.c|' src/glsl/nir/.deps/nir_lower_samplers.Plo

Fixes: ef8eebc6ad5(nir: support indirect indexing samplers in struct arrays)
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reported-by: Gottfried Haider <gottfried.haider@gmail.com>
Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/Makefile.sources           |   2 +-
 src/glsl/nir/nir_lower_samplers.c   | 188 +++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_lower_samplers.cpp | 190 ------------------------------------
 3 files changed, 189 insertions(+), 191 deletions(-)
 create mode 100644 src/glsl/nir/nir_lower_samplers.c
 delete mode 100644 src/glsl/nir/nir_lower_samplers.cpp

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 1aaa062adea..f7c69f4fe3f 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -43,7 +43,7 @@ NIR_FILES = \
 	nir/nir_lower_io.c \
 	nir/nir_lower_outputs_to_temporaries.c \
 	nir/nir_lower_phis_to_scalar.c \
-	nir/nir_lower_samplers.cpp \
+	nir/nir_lower_samplers.c \
 	nir/nir_lower_system_values.c \
 	nir/nir_lower_tex.c \
 	nir/nir_lower_to_source_mods.c \
diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c
new file mode 100644
index 00000000000..58ea0db4e0f
--- /dev/null
+++ b/src/glsl/nir/nir_lower_samplers.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "../program.h"
+#include "program/hash_table.h"
+#include "ir_uniform.h"
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+
+/* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+static void
+calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                     unsigned *array_elements, nir_ssa_def **indirect,
+                     nir_builder *b, unsigned *location)
+{
+   if (tail->child == NULL)
+      return;
+
+   switch (tail->child->deref_type) {
+   case nir_deref_type_array: {
+      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+      assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      instr->sampler_index += deref_array->base_offset * *array_elements;
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         nir_ssa_def *mul =
+            nir_imul(b, nir_imm_int(b, *array_elements),
+                     nir_ssa_for_src(b, deref_array->indirect, 1));
+
+         nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                               NIR_SRC_INIT);
+
+         if (*indirect) {
+            *indirect = nir_iadd(b, *indirect, mul);
+         } else {
+            *indirect = mul;
+         }
+      }
+
+      *array_elements *= glsl_get_length(tail->type);
+       break;
+   }
+
+   case nir_deref_type_struct: {
+      nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+      *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      break;
+   }
+
+   default:
+      unreachable("Invalid deref type");
+      break;
+   }
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+              gl_shader_stage stage, nir_builder *builder)
+{
+   if (instr->sampler == NULL)
+      return;
+
+   instr->sampler_index = 0;
+   unsigned location = instr->sampler->var->data.location;
+   unsigned array_elements = 1;
+   nir_ssa_def *indirect = NULL;
+
+   builder->cursor = nir_before_instr(&instr->instr);
+   calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                        &indirect, builder, &location);
+
+   if (indirect) {
+      /* First, we have to resize the array of texture sources */
+      nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                            instr->num_srcs + 1);
+
+      for (unsigned i = 0; i < instr->num_srcs; i++) {
+         new_srcs[i].src_type = instr->src[i].src_type;
+         nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                            &instr->src[i].src);
+      }
+
+      ralloc_free(instr->src);
+      instr->src = new_srcs;
+
+      /* Now we can go ahead and move the source over to being a
+       * first-class texture source.
+       */
+      instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+      instr->num_srcs++;
+      nir_instr_rewrite_src(&instr->instr,
+                            &instr->src[instr->num_srcs - 1].src,
+                            nir_src_for_ssa(indirect));
+
+      instr->sampler_array_size = array_elements;
+   }
+
+   if (location > shader_program->NumUniformStorage - 1 ||
+       !shader_program->UniformStorage[location].sampler[stage].active) {
+      assert(!"cannot return a sampler");
+      return;
+   }
+
+   instr->sampler_index +=
+      shader_program->UniformStorage[location].sampler[stage].index;
+
+   instr->sampler = NULL;
+}
+
+typedef struct {
+   nir_builder builder;
+   const struct gl_shader_program *shader_program;
+   gl_shader_stage stage;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+   lower_state *state = (lower_state *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_tex) {
+         nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+         lower_sampler(tex_instr, state->shader_program, state->stage,
+                       &state->builder);
+      }
+   }
+
+   return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+           gl_shader_stage stage)
+{
+   lower_state state;
+
+   nir_builder_init(&state.builder, impl);
+   state.shader_program = shader_program;
+   state.stage = stage;
+
+   nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+void
+nir_lower_samplers(nir_shader *shader,
+                   const struct gl_shader_program *shader_program)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_impl(overload->impl, shader_program, shader->stage);
+   }
+}
diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp
deleted file mode 100644
index c8e1faa0334..00000000000
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
- * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "nir.h"
-#include "nir_builder.h"
-#include "../program.h"
-#include "program/hash_table.h"
-#include "ir_uniform.h"
-
-extern "C" {
-#include "main/compiler.h"
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "program/program.h"
-}
-
-/* Calculate the sampler index based on array indicies and also
- * calculate the base uniform location for struct members.
- */
-static void
-calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
-                     unsigned *array_elements, nir_ssa_def **indirect,
-                     nir_builder *b, unsigned *location)
-{
-   if (tail->child == NULL)
-      return;
-
-   switch (tail->child->deref_type) {
-   case nir_deref_type_array: {
-      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
-
-      assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
-
-      calc_sampler_offsets(tail->child, instr, array_elements,
-                           indirect, b, location);
-      instr->sampler_index += deref_array->base_offset * *array_elements;
-
-      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
-         nir_ssa_def *mul =
-            nir_imul(b, nir_imm_int(b, *array_elements),
-                     nir_ssa_for_src(b, deref_array->indirect, 1));
-
-         nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
-                               NIR_SRC_INIT);
-
-         if (*indirect) {
-            *indirect = nir_iadd(b, *indirect, mul);
-         } else {
-            *indirect = mul;
-         }
-      }
-
-      *array_elements *= glsl_get_length(tail->type);
-       break;
-   }
-
-   case nir_deref_type_struct: {
-      nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
-      *location += tail->type->record_location_offset(deref_struct->index);
-      calc_sampler_offsets(tail->child, instr, array_elements,
-                           indirect, b, location);
-      break;
-   }
-
-   default:
-      unreachable("Invalid deref type");
-      break;
-   }
-}
-
-static void
-lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
-              gl_shader_stage stage, nir_builder *builder)
-{
-   if (instr->sampler == NULL)
-      return;
-
-   instr->sampler_index = 0;
-   unsigned location = instr->sampler->var->data.location;
-   unsigned array_elements = 1;
-   nir_ssa_def *indirect = NULL;
-
-   builder->cursor = nir_before_instr(&instr->instr);
-   calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
-                        &indirect, builder, &location);
-
-   if (indirect) {
-      /* First, we have to resize the array of texture sources */
-      nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
-                                            instr->num_srcs + 1);
-
-      for (unsigned i = 0; i < instr->num_srcs; i++) {
-         new_srcs[i].src_type = instr->src[i].src_type;
-         nir_instr_move_src(&instr->instr, &new_srcs[i].src,
-                            &instr->src[i].src);
-      }
-
-      ralloc_free(instr->src);
-      instr->src = new_srcs;
-
-      /* Now we can go ahead and move the source over to being a
-       * first-class texture source.
-       */
-      instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
-      instr->num_srcs++;
-      nir_instr_rewrite_src(&instr->instr,
-                            &instr->src[instr->num_srcs - 1].src,
-                            nir_src_for_ssa(indirect));
-
-      instr->sampler_array_size = array_elements;
-   }
-
-   if (location > shader_program->NumUniformStorage - 1 ||
-       !shader_program->UniformStorage[location].sampler[stage].active) {
-      assert(!"cannot return a sampler");
-      return;
-   }
-
-   instr->sampler_index +=
-      shader_program->UniformStorage[location].sampler[stage].index;
-
-   instr->sampler = NULL;
-}
-
-typedef struct {
-   nir_builder builder;
-   const struct gl_shader_program *shader_program;
-   gl_shader_stage stage;
-} lower_state;
-
-static bool
-lower_block_cb(nir_block *block, void *_state)
-{
-   lower_state *state = (lower_state *) _state;
-
-   nir_foreach_instr(block, instr) {
-      if (instr->type == nir_instr_type_tex) {
-         nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
-         lower_sampler(tex_instr, state->shader_program, state->stage,
-                       &state->builder);
-      }
-   }
-
-   return true;
-}
-
-static void
-lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
-           gl_shader_stage stage)
-{
-   lower_state state;
-
-   nir_builder_init(&state.builder, impl);
-   state.shader_program = shader_program;
-   state.stage = stage;
-
-   nir_foreach_block(impl, lower_block_cb, &state);
-}
-
-extern "C" void
-nir_lower_samplers(nir_shader *shader,
-                   const struct gl_shader_program *shader_program)
-{
-   nir_foreach_overload(shader, overload) {
-      if (overload->impl)
-         lower_impl(overload->impl, shader_program, shader->stage);
-   }
-}
-- 
cgit v1.2.3


From d513388c8aa1ef4edb937e97a75b953f1abe16f3 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 10 Sep 2015 16:53:08 -0700
Subject: nir: Move system value -> intrinsic mapping into nir.c

This way they're right next to the map going the other direction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.c                     | 38 +++++++++++++++++++++++++++++++
 src/glsl/nir/nir.h                     |  1 +
 src/glsl/nir/nir_lower_system_values.c | 41 +---------------------------------
 3 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 23071eff65b..1206bb4dfcb 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1461,6 +1461,44 @@ nir_index_instrs(nir_function_impl *impl)
    return index;
 }
 
+nir_intrinsic_op
+nir_intrinsic_from_system_value(gl_system_value val)
+{
+   switch (val) {
+   case SYSTEM_VALUE_VERTEX_ID:
+      return nir_intrinsic_load_vertex_id;
+   case SYSTEM_VALUE_INSTANCE_ID:
+      return nir_intrinsic_load_instance_id;
+   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+      return nir_intrinsic_load_vertex_id_zero_base;
+   case SYSTEM_VALUE_BASE_VERTEX:
+      return nir_intrinsic_load_base_vertex;
+   case SYSTEM_VALUE_INVOCATION_ID:
+      return nir_intrinsic_load_invocation_id;
+   case SYSTEM_VALUE_FRONT_FACE:
+      return nir_intrinsic_load_front_face;
+   case SYSTEM_VALUE_SAMPLE_ID:
+      return nir_intrinsic_load_sample_id;
+   case SYSTEM_VALUE_SAMPLE_POS:
+      return nir_intrinsic_load_sample_pos;
+   case SYSTEM_VALUE_SAMPLE_MASK_IN:
+      return nir_intrinsic_load_sample_mask_in;
+   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+      return nir_intrinsic_load_local_invocation_id;
+   case SYSTEM_VALUE_WORK_GROUP_ID:
+      return nir_intrinsic_load_work_group_id;
+   /* FINISHME: Add tessellation intrinsics.
+   case SYSTEM_VALUE_TESS_COORD:
+   case SYSTEM_VALUE_VERTICES_IN:
+   case SYSTEM_VALUE_PRIMITIVE_ID:
+   case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+   case SYSTEM_VALUE_TESS_LEVEL_INNER:
+    */
+   default:
+      unreachable("system value does not directly correspond to intrinsic");
+   }
+}
+
 gl_system_value
 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 63b0b514c50..666a70fe5c7 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1924,6 +1924,7 @@ bool nir_opt_undef(nir_shader *shader);
 
 void nir_sweep(nir_shader *shader);
 
+nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
 gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
 
 #ifdef __cplusplus
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index a656b27a72a..06ee3e68c8d 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -40,46 +40,7 @@ convert_instr(nir_intrinsic_instr *instr)
 
    void *mem_ctx = ralloc_parent(instr);
 
-   nir_intrinsic_op op;
-
-   switch (var->data.location) {
-   case SYSTEM_VALUE_FRONT_FACE:
-      op = nir_intrinsic_load_front_face;
-      break;
-   case SYSTEM_VALUE_VERTEX_ID:
-      op = nir_intrinsic_load_vertex_id;
-      break;
-   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
-      op = nir_intrinsic_load_vertex_id_zero_base;
-      break;
-   case SYSTEM_VALUE_BASE_VERTEX:
-      op = nir_intrinsic_load_base_vertex;
-      break;
-   case SYSTEM_VALUE_INSTANCE_ID:
-      op = nir_intrinsic_load_instance_id;
-      break;
-   case SYSTEM_VALUE_SAMPLE_ID:
-      op = nir_intrinsic_load_sample_id;
-      break;
-   case SYSTEM_VALUE_SAMPLE_POS:
-      op = nir_intrinsic_load_sample_pos;
-      break;
-   case SYSTEM_VALUE_SAMPLE_MASK_IN:
-      op = nir_intrinsic_load_sample_mask_in;
-      break;
-   case SYSTEM_VALUE_INVOCATION_ID:
-      op = nir_intrinsic_load_invocation_id;
-      break;
-   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
-      op = nir_intrinsic_load_local_invocation_id;
-      break;
-   case SYSTEM_VALUE_WORK_GROUP_ID:
-      op = nir_intrinsic_load_work_group_id;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
+   nir_intrinsic_op op = nir_intrinsic_from_system_value(var->data.location);
    nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
 
    if (instr->dest.is_ssa) {
-- 
cgit v1.2.3


From 46362db4a6bb6db64727d3adcb16ca8f32aa70fb Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 21 Sep 2015 08:22:12 -0700
Subject: nir/builder: Don't use designated initializers

Designated initializers are not allowed in C++ (not even C++11).  Since
nir_lower_samplers is now using nir_builder, and nir_lower_samplers is in
C++, this breaks the build on some compilers.  Aparently, GCC 5 allows it
in some limited extent because mesa still builds on my system without this
patch.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92052
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_builder.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 8db5fcf039d..624329d0a8a 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -76,21 +76,36 @@ nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value
 static inline nir_ssa_def *
 nir_imm_float(nir_builder *build, float x)
 {
-   nir_const_value v = { { .f = {x, 0, 0, 0} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.f[0] = x;
+
    return nir_build_imm(build, 1, v);
 }
 
 static inline nir_ssa_def *
 nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
 {
-   nir_const_value v = { { .f = {x, y, z, w} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.f[0] = x;
+   v.f[1] = y;
+   v.f[2] = z;
+   v.f[3] = w;
+
    return nir_build_imm(build, 4, v);
 }
 
 static inline nir_ssa_def *
 nir_imm_int(nir_builder *build, int x)
 {
-   nir_const_value v = { { .i = {x, 0, 0, 0} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.i[0] = x;
+
    return nir_build_imm(build, 1, v);
 }
 
-- 
cgit v1.2.3


From c1e38ad37042b0ec261eb0ba5631b7ff0ee7a9da Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Thu, 10 Sep 2015 16:59:12 -0700
Subject: i965/skl: Use larger URB size where available.

All SKL SKUs except the lowest one which has half the L3 size actually have 384K
of URB per slice.

For once, I can explain how this mistake was made and how it was missed in
review...  Historically when we enable a platform and put the production sizes,
you can simply look at the "smallest" SKU and see what its URB size is (and we
assumed it was the 1 slice variant). Since on newer platforms the URB sizes are
scaled automatically by HW, this was sufficient. On SKL, this is a bit different
as the lowest SKU actually has half of the L3 fused off. GT2 is the 1 slice (not
GT1) variant and it has 384K.

There are no Jenkins tests fixed (or regressions) and we don't expect any fixes
here because you can always run with less URB size.

Thanks to Sarah for bringing this to my attention.

Cc: Sarah Sharp <sarah.a.sharp@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_device_info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 7ad3a2fb7b4..a6a3bb670ca 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -314,7 +314,7 @@ static const struct brw_device_info brw_device_info_chv = {
    .max_wm_threads = 64 * 6,                        \
    .max_cs_threads = 56,                            \
    .urb = {                                         \
-      .size = 192,                                  \
+      .size = 384,                                  \
       .min_vs_entries = 64,                         \
       .max_vs_entries = 1856,                       \
       .max_hs_entries = 672,                        \
@@ -324,6 +324,7 @@ static const struct brw_device_info brw_device_info_chv = {
 
 static const struct brw_device_info brw_device_info_skl_gt1 = {
    GEN9_FEATURES, .gt = 1,
+   .urb.size = 192,
 };
 
 static const struct brw_device_info brw_device_info_skl_gt2 = {
-- 
cgit v1.2.3


From 60befc63470a9cf1ac59ca74cd0a8164a25db09e Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 21 Sep 2015 11:18:23 -0700
Subject: i965: Clean up GLSL compiler option setup

The only functional change here is that we now set EmitNoIndirectOutput and
EmitNoIndirectTemp for compute shaders.  Compute shaders don't have outputs
per-se and we should have been setting EmitNoIndirectTemp all along.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 46 ++++++++++++++------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index eed73fbadc3..c311a038e4f 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -115,41 +115,35 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
       compiler->glsl_compiler_options[i].EmitNoNoise = true;
       compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
       compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
-      compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
-	 (i == MESA_SHADER_FRAGMENT);
-      compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
-	 (i == MESA_SHADER_FRAGMENT);
       compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
       compiler->glsl_compiler_options[i].LowerClipDistance = true;
 
+      bool is_scalar;
+      switch (i) {
+      case MESA_SHADER_FRAGMENT:
+      case MESA_SHADER_COMPUTE:
+         is_scalar = true;
+         break;
+      case MESA_SHADER_VERTEX:
+         is_scalar = compiler->scalar_vs;
+         break;
+      default:
+         is_scalar = false;
+         break;
+      }
+
+      compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+      compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+      compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+
       /* !ARB_gpu_shader5 */
       if (devinfo->gen < 7)
          compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
-   }
 
-   compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-   compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
-   if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-      if (compiler->scalar_vs) {
-         /* If we're using the scalar backend for vertex shaders, we need to
-          * configure these accordingly.
-          */
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-      }
-
-      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
-   }
-
-   if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-      compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+      if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+         compiler->glsl_compiler_options[i].NirOptions = nir_options;
    }
 
-   compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
-   compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
-
    return compiler;
 }
 
-- 
cgit v1.2.3


From 967a5ddb88d248aa317964c9b0951332b2494aab Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 08:38:10 -0700
Subject: nir: Report progress from nir_lower_globals_vars_to_local().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                            | 2 +-
 src/glsl/nir/nir_lower_global_vars_to_local.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 666a70fe5c7..3d071b17ce3 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1809,7 +1809,7 @@ void nir_split_var_copies(nir_shader *shader);
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
 void nir_lower_var_copies(nir_shader *shader);
 
-void nir_lower_global_vars_to_local(nir_shader *shader);
+bool nir_lower_global_vars_to_local(nir_shader *shader);
 
 void nir_lower_locals_to_regs(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_global_vars_to_local.c b/src/glsl/nir/nir_lower_global_vars_to_local.c
index 0cd8740cc12..fab236611a5 100644
--- a/src/glsl/nir/nir_lower_global_vars_to_local.c
+++ b/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -73,10 +73,11 @@ mark_global_var_uses_block(nir_block *block, void *void_state)
    return true;
 }
 
-void
+bool
 nir_lower_global_vars_to_local(nir_shader *shader)
 {
    struct global_to_local_state state;
+   bool progress = false;
 
    state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                   _mesa_key_pointer_equal);
@@ -99,8 +100,11 @@ nir_lower_global_vars_to_local(nir_shader *shader)
          exec_node_remove(&var->node);
          var->data.mode = nir_var_local;
          exec_list_push_tail(&impl->locals, &var->node);
+         progress = true;
       }
    }
 
    _mesa_hash_table_destroy(state.var_func_table, NULL);
+
+   return progress;
 }
-- 
cgit v1.2.3


From 9f5e7ae9d83ce6de761936b95cd0b7ba4c1219c4 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Wed, 9 Sep 2015 17:50:09 -0700
Subject: nir: Report progress from lower_vec_to_movs().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                   |  2 +-
 src/glsl/nir/nir_lower_vec_to_movs.c | 27 +++++++++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3d071b17ce3..4b05807e1d0 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1826,7 +1826,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);
 void nir_remove_dead_variables(nir_shader *shader);
 
 void nir_move_vec_src_uses_to_dest(nir_shader *shader);
-void nir_lower_vec_to_movs(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader);
 void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_load_const_to_scalar(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 2cb0457f9ba..b7ee4e8d33c 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -32,6 +32,11 @@
  * moves with partial writes.
  */
 
+struct vec_to_movs_state {
+   nir_function_impl *impl;
+   bool progress;
+};
+
 static bool
 src_matches_dest_reg(nir_dest *dest, nir_src *src)
 {
@@ -185,9 +190,10 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
 }
 
 static bool
-lower_vec_to_movs_block(nir_block *block, void *void_impl)
+lower_vec_to_movs_block(nir_block *block, void *void_state)
 {
-   nir_function_impl *impl = void_impl;
+   struct vec_to_movs_state *state = void_state;
+   nir_function_impl *impl = state->impl;
    nir_shader *shader = impl->overload->function->shader;
 
    nir_foreach_instr_safe(block, instr) {
@@ -246,22 +252,31 @@ lower_vec_to_movs_block(nir_block *block, void *void_impl)
 
       nir_instr_remove(&vec->instr);
       ralloc_free(vec);
+      state->progress = true;
    }
 
    return true;
 }
 
-static void
+static bool
 nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 {
-   nir_foreach_block(impl, lower_vec_to_movs_block, impl);
+   struct vec_to_movs_state state = { impl, false };
+
+   nir_foreach_block(impl, lower_vec_to_movs_block, &state);
+
+   return state.progress;
 }
 
-void
+bool
 nir_lower_vec_to_movs(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         nir_lower_vec_to_movs_impl(overload->impl);
+         progress = nir_lower_vec_to_movs_impl(overload->impl) || progress;
    }
+
+   return progress;
 }
-- 
cgit v1.2.3


From 1adde5b87e43b1512c0744c412d51cbc0078329b Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 10:57:14 -0700
Subject: nir: Report progress from nir_remove_dead_variables().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                       |  2 +-
 src/glsl/nir/nir_remove_dead_variables.c | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4b05807e1d0..5be5bfa2ab8 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1823,7 +1823,7 @@ void nir_lower_io(nir_shader *shader,
                   int (*type_size)(const struct glsl_type *));
 void nir_lower_vars_to_ssa(nir_shader *shader);
 
-void nir_remove_dead_variables(nir_shader *shader);
+bool nir_remove_dead_variables(nir_shader *shader);
 
 void nir_move_vec_src_uses_to_dest(nir_shader *shader);
 bool nir_lower_vec_to_movs(nir_shader *shader);
diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c
index 4417e2a4883..d6783e78803 100644
--- a/src/glsl/nir/nir_remove_dead_variables.c
+++ b/src/glsl/nir/nir_remove_dead_variables.c
@@ -97,32 +97,39 @@ add_var_use_shader(nir_shader *shader, struct set *live)
    }
 }
 
-static void
+static bool
 remove_dead_vars(struct exec_list *var_list, struct set *live)
 {
+   bool progress = false;
+
    foreach_list_typed_safe(nir_variable, var, node, var_list) {
       struct set_entry *entry = _mesa_set_search(live, var);
       if (entry == NULL) {
          exec_node_remove(&var->node);
          ralloc_free(var);
+         progress = true;
       }
    }
+
+   return progress;
 }
 
-void
+bool
 nir_remove_dead_variables(nir_shader *shader)
 {
+   bool progress = false;
    struct set *live =
       _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
    add_var_use_shader(shader, live);
 
-   remove_dead_vars(&shader->globals, live);
+   progress = remove_dead_vars(&shader->globals, live) || progress;
 
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         remove_dead_vars(&overload->impl->locals, live);
+         progress = remove_dead_vars(&overload->impl->locals, live) || progress;
    }
 
    _mesa_set_destroy(live, NULL);
+   return progress;
 }
-- 
cgit v1.2.3


From cfae0f8a3ae98ae10bcf9e4f1429f66165a17d95 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 12:29:49 -0700
Subject: nir: Report progress from nir_lower_locals_to_regs().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                      |  2 +-
 src/glsl/nir/nir_lower_locals_to_regs.c | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 5be5bfa2ab8..26b3c75c140 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1811,7 +1811,7 @@ void nir_lower_var_copies(nir_shader *shader);
 
 bool nir_lower_global_vars_to_local(nir_shader *shader);
 
-void nir_lower_locals_to_regs(nir_shader *shader);
+bool nir_lower_locals_to_regs(nir_shader *shader);
 
 void nir_lower_outputs_to_temporaries(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index 87d2498dd79..17b53ca36f3 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -40,6 +40,8 @@ struct locals_to_regs_state {
     * used to make adding register initialization code deterministic.
     */
    nir_array derefs_array;
+
+   bool progress;
 };
 
 /* The following two functions implement a hash and equality check for
@@ -228,6 +230,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
          nir_instr_insert_before(&intrin->instr, &mov->instr);
 
          nir_instr_remove(&intrin->instr);
+         state->progress = true;
          break;
       }
 
@@ -249,6 +252,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
          nir_instr_insert_before(&intrin->instr, &mov->instr);
 
          nir_instr_remove(&intrin->instr);
+         state->progress = true;
          break;
       }
 
@@ -336,15 +340,17 @@ insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail,
    mov->dest.dest.reg.indirect = reg_src.reg.indirect;
 
    nir_instr_insert_after(&load->instr, &mov->instr);
+   state->progress = true;
 }
 
-static void
+static bool
 nir_lower_locals_to_regs_impl(nir_function_impl *impl)
 {
    struct locals_to_regs_state state;
 
    state.shader = impl->overload->function->shader;
    state.impl = impl;
+   state.progress = false;
    state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
    nir_array_init(&state.derefs_array, NULL);
 
@@ -372,13 +378,19 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl)
 
    nir_array_fini(&state.derefs_array);
    _mesa_hash_table_destroy(state.regs_table, NULL);
+
+   return state.progress;
 }
 
-void
+bool
 nir_lower_locals_to_regs(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         nir_lower_locals_to_regs_impl(overload->impl);
+         progress = nir_lower_locals_to_regs_impl(overload->impl) || progress;
    }
+
+   return progress;
 }
-- 
cgit v1.2.3


From dc18b9357b553a972ea439facfbc55e376f1179f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 12:33:36 -0700
Subject: nir: Report progress from nir_split_var_copies().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                  |  2 +-
 src/glsl/nir/nir_split_var_copies.c | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 26b3c75c140..bccaf58c03c 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1804,7 +1804,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
 void nir_dump_cfg(nir_shader *shader, FILE *fp);
 
-void nir_split_var_copies(nir_shader *shader);
+bool nir_split_var_copies(nir_shader *shader);
 
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
 void nir_lower_var_copies(nir_shader *shader);
diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c
index fc72c078c77..d2ea58a8b7c 100644
--- a/src/glsl/nir/nir_split_var_copies.c
+++ b/src/glsl/nir/nir_split_var_copies.c
@@ -64,6 +64,7 @@
 struct split_var_copies_state {
    void *mem_ctx;
    void *dead_ctx;
+   bool progress;
 };
 
 static nir_deref *
@@ -198,6 +199,7 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
           * remove the old one later.
           */
          nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+         state->progress = true;
       }
       break;
 
@@ -256,24 +258,31 @@ split_var_copies_block(nir_block *block, void *void_state)
    return true;
 }
 
-static void
+static bool
 split_var_copies_impl(nir_function_impl *impl)
 {
    struct split_var_copies_state state;
 
    state.mem_ctx = ralloc_parent(impl);
    state.dead_ctx = ralloc_context(NULL);
+   state.progress = false;
 
    nir_foreach_block(impl, split_var_copies_block, &state);
 
    ralloc_free(state.dead_ctx);
+
+   return state.progress;
 }
 
-void
+bool
 nir_split_var_copies(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         split_var_copies_impl(overload->impl);
+         progress = split_var_copies_impl(overload->impl) || progress;
    }
+
+   return progress;
 }
-- 
cgit v1.2.3


From 0a1adaf11d051b71b4c46aabee2e5342f2d6aef3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 13:00:58 -0700
Subject: nir: Report progress from nir_lower_system_values().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                     |  2 +-
 src/glsl/nir/nir_lower_system_values.c | 27 ++++++++++++++++++---------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index bccaf58c03c..99cfe9dfb27 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1835,7 +1835,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader);
 void nir_lower_samplers(nir_shader *shader,
                         const struct gl_shader_program *shader_program);
 
-void nir_lower_system_values(nir_shader *shader);
+bool nir_lower_system_values(nir_shader *shader);
 
 typedef struct nir_lower_tex_options {
    /**
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index 06ee3e68c8d..d77bb2f8213 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -28,15 +28,15 @@
 #include "nir.h"
 #include "main/mtypes.h"
 
-static void
+static bool
 convert_instr(nir_intrinsic_instr *instr)
 {
    if (instr->intrinsic != nir_intrinsic_load_var)
-      return;
+      return false;
 
    nir_variable *var = instr->variables[0]->var;
    if (var->data.mode != nir_var_system_value)
-      return;
+      return false;
 
    void *mem_ctx = ralloc_parent(instr);
 
@@ -54,36 +54,45 @@ convert_instr(nir_intrinsic_instr *instr)
 
    nir_instr_insert_before(&instr->instr, &new_instr->instr);
    nir_instr_remove(&instr->instr);
+
+   return true;
 }
 
 static bool
 convert_block(nir_block *block, void *state)
 {
-   (void) state;
+   bool *progress = state;
 
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_intrinsic)
-         convert_instr(nir_instr_as_intrinsic(instr));
+         *progress = convert_instr(nir_instr_as_intrinsic(instr)) || *progress;
    }
 
    return true;
 }
 
-static void
+static bool
 convert_impl(nir_function_impl *impl)
 {
-   nir_foreach_block(impl, convert_block, NULL);
+   bool progress;
+
+   nir_foreach_block(impl, convert_block, &progress);
    nir_metadata_preserve(impl, nir_metadata_block_index |
                                nir_metadata_dominance);
+   return progress;
 }
 
-void
+bool
 nir_lower_system_values(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         convert_impl(overload->impl);
+         progress = convert_impl(overload->impl) || progress;
    }
 
    exec_list_make_empty(&shader->system_values);
+
+   return progress;
 }
-- 
cgit v1.2.3


From d7ffd90ecb1e91fc3e0c816c06c71a2743d48276 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 13:08:03 -0700
Subject: nir: Add braces around multi-line loop.

This was correct but not our usual style.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_normalize_cubemap_coords.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c
index ca68bd7a94c..dd6de40c46f 100644
--- a/src/glsl/nir/nir_normalize_cubemap_coords.c
+++ b/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -98,7 +98,8 @@ normalize_cubemap_coords_impl(nir_function_impl *impl)
 void
 nir_normalize_cubemap_coords(nir_shader *shader)
 {
-   nir_foreach_overload(shader, overload)
+   nir_foreach_overload(shader, overload) {
       if (overload->impl)
          normalize_cubemap_coords_impl(overload->impl);
+   }
 }
-- 
cgit v1.2.3


From 5cede90f6269d517a420856cced705c2d2cc7ef6 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 17 Sep 2015 13:18:41 -0700
Subject: nir: Report progress from nir_normalize_cubemap_coords().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                          |  2 +-
 src/glsl/nir/nir_normalize_cubemap_coords.c | 29 ++++++++++++++++++++++-------
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 99cfe9dfb27..619a363b540 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1883,7 +1883,7 @@ void nir_lower_two_sided_color(nir_shader *shader);
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
-void nir_normalize_cubemap_coords(nir_shader *shader);
+bool nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_variables_impl(nir_function_impl *impl);
 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c
index dd6de40c46f..7385576a223 100644
--- a/src/glsl/nir/nir_normalize_cubemap_coords.c
+++ b/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -33,10 +33,16 @@
  * or 1.0.  This is based on the old GLSL IR based pass by Eric.
  */
 
+struct normalize_cubemap_state {
+   nir_builder b;
+   bool progress;
+};
+
 static bool
 normalize_cubemap_coords_block(nir_block *block, void *void_state)
 {
-   nir_builder *b = void_state;
+   struct normalize_cubemap_state *state = void_state;
+   nir_builder *b = &state->b;
 
    nir_foreach_instr(block, instr) {
       if (instr->type != nir_instr_type_tex)
@@ -77,29 +83,38 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
          nir_instr_rewrite_src(&tex->instr,
                                &tex->src[i].src,
                                nir_src_for_ssa(normalized));
+
+         state->progress = true;
       }
    }
 
    return true;
 }
 
-static void
+static bool
 normalize_cubemap_coords_impl(nir_function_impl *impl)
 {
-   nir_builder b;
-   nir_builder_init(&b, impl);
+   struct normalize_cubemap_state state;
+   nir_builder_init(&state.b, impl);
+   state.progress = false;
 
-   nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+   nir_foreach_block(impl, normalize_cubemap_coords_block, &state);
 
    nir_metadata_preserve(impl, nir_metadata_block_index |
                                nir_metadata_dominance);
+
+   return state.progress;
 }
 
-void
+bool
 nir_normalize_cubemap_coords(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         normalize_cubemap_coords_impl(overload->impl);
+         progress = normalize_cubemap_coords_impl(overload->impl) || progress;
    }
+
+   return progress;
 }
-- 
cgit v1.2.3


From 7f8815bcb9af9b4b374ad7bd6e7cfa7529a6c980 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Mon, 7 Sep 2015 15:08:13 +0300
Subject: i965: fix textureGrad for cubemaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes bugs exposed by commit
2b1cdb0eddb73f62e4848d4b64840067f1f70865 in:
   ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_frag

No regressions observed in deqp, CTS or Piglit.

v2: address review feedback from Iago Toral:
   - move rho calculation to else branch
   - optimize dx and dy calculation
   - fix documentation inconsistensies

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91114
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 .../dri/i965/brw_lower_texture_gradients.cpp       | 201 +++++++++++++++++++--
 1 file changed, 182 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
index 7a5f9834423..d571ecd4394 100644
--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -48,6 +48,7 @@ public:
 
 private:
    void emit(ir_variable *, ir_rvalue *);
+   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
 };
 
 /**
@@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
    base_ir->insert_before(assign(var, value));
 }
 
+/**
+ * Emit a temporary variable declaration
+ */
+ir_variable *
+lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
+   base_ir->insert_before(var);
+   return var;
+}
+
 static const glsl_type *
 txs_type(const glsl_type *type)
 {
@@ -144,28 +156,179 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
       new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
    emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
 
-   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
-   ir_rvalue *rho;
-   if (dPdx->type->is_scalar()) {
-      rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
-			       expr(ir_unop_abs, dPdy));
-   } else {
-      rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
-			       expr(ir_unop_sqrt, dot(dPdy, dPdy)));
-   }
-
-   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
-    *
-    * For cube maps the result of these formulas is giving us a value of rho
-    * that is twice the value we should use, so divide it by 2 or,
-    * alternatively, remove one unit from the result of the log2 computation.
-    */
    ir->op = ir_txl;
    if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
-      ir->lod_info.lod = expr(ir_binop_add,
-                              expr(ir_unop_log2, rho),
-                              new(mem_ctx) ir_constant(-1.0f));
+      /* Cubemap texture lookups first generate a texture coordinate normalized
+       * to [-1, 1] on the appropiate face. The appropiate face is determined
+       * by which component has largest magnitude and its sign. The texture
+       * coordinate is the quotient of the remaining texture coordinates against
+       * that absolute value of the component of largest magnitude. This
+       * division requires that the computing of the derivative of the texel
+       * coordinate must use the quotient rule. The high level GLSL code is as
+       * follows:
+       *
+       * Step 1: selection
+       *
+       * vec3 abs_p, Q, dQdx, dQdy;
+       * abs_p = abs(ir->coordinate);
+       * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+       *    Q = ir->coordinate.yzx;
+       *    dQdx = ir->lod_info.grad.dPdx.yzx;
+       *    dQdy = ir->lod_info.grad.dPdy.yzx;
+       * }
+       * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+       *    Q = ir->coordinate.xzy;
+       *    dQdx = ir->lod_info.grad.dPdx.xzy;
+       *    dQdy = ir->lod_info.grad.dPdy.xzy;
+       * }
+       * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+       *    Q = ir->coordinate;
+       *    dQdx = ir->lod_info.grad.dPdx;
+       *    dQdy = ir->lod_info.grad.dPdy;
+       * }
+       *
+       * Step 2: use quotient rule to compute derivative. The normalized to
+       * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+       * only concerned with the magnitudes of the derivatives whose values are
+       * not affected by the sign. We drop the sign from the computation.
+       *
+       * vec2 dx, dy;
+       * float recip;
+       *
+       * recip = 1.0 / Q.z;
+       * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+       * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+       *
+       * Step 3: compute LOD. At this point we have the derivatives of the
+       * texture coordinates normalized to [-1,1]. We take the LOD to be
+       *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+       *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+       *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+       *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+       *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+       * where L is the dimension of the cubemap. The code is:
+       *
+       * float M, result;
+       * M = max(dot(dx, dx), dot(dy, dy));
+       * L = textureSize(sampler, 0).x;
+       * result = -1.0 + 0.5 * log2(L * L * M);
+       */
+
+/* Helpers to make code more human readable. */
+#define EMIT(instr) base_ir->insert_before(instr)
+#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
+#define CLONE(x) x->clone(mem_ctx, NULL)
+
+      ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
+
+      EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
+
+      ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
+      ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
+      ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
+
+      /* unmodified dPdx, dPdy values */
+      ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
+      ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
+
+      /* 1. compute selector */
+
+      /* if (abs_p.x >= max(abs_p.y, abs_p.z))  ... */
+      ir_if *branch_x =
+         new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
+                                   max2(swizzle_y(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.yzx;
+       * dQdx = dPdx.yzx;
+       * dQdy = dPdy.yzx;
+       */
+      int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
+      THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
+      THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
+      THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
+      EMIT(branch_x);
+
+      /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
+      ir_if *branch_y =
+         new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
+                                   max2(swizzle_x(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.xzy;
+       * dQdx = dPdx.xzy;
+       * dQdy = dPdy.xzy;
+       */
+      int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
+      THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
+      THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
+      THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
+      EMIT(branch_y);
+
+      /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
+      ir_if *branch_z =
+         new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
+                            max2(swizzle_x(abs_p), swizzle_y(abs_p))));
+
+      /* Q = p;
+       * dQdx = dPdx;
+       * dQdy = dPdy;
+       */
+      THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
+      THEN(branch_z, assign(dQdx, CLONE(dPdx)));
+      THEN(branch_z, assign(dQdy, CLONE(dPdy)));
+      EMIT(branch_z);
+
+      /* 2. quotient rule */
+      ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
+      EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
+
+      ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
+      ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
+
+      /* tmp = Q.xy * recip;
+       * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+       * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+       */
+      ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
+      EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
+      EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
+                                     mul(tmp, swizzle_z(dQdx))))));
+      EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
+                                     mul(tmp, swizzle_z(dQdy))))));
+
+      /* M = max(dot(dx, dx), dot(dy, dy)); */
+      ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
+      EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
+
+      /* size has textureSize() of LOD 0 */
+      ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
+      EMIT(assign(L, swizzle_x(size)));
+
+      ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
+
+      /* result = -1.0 + 0.5 * log2(L * L * M); */
+      EMIT(assign(result,
+                  add(new(mem_ctx)ir_constant(-1.0f),
+                      mul(new(mem_ctx)ir_constant(0.5f),
+                          expr(ir_unop_log2, mul(mul(L, L), M))))));
+
+      /* 3. final assignment of parameters to textureLod call */
+      ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
+
+#undef THEN
+#undef EMIT
+
    } else {
+      /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
+      ir_rvalue *rho;
+      if (dPdx->type->is_scalar()) {
+         rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
+                    expr(ir_unop_abs, dPdy));
+      } else {
+         rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
+                    expr(ir_unop_sqrt, dot(dPdy, dPdy)));
+      }
+
+      /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
       ir->lod_info.lod = expr(ir_unop_log2, rho);
    }
 
-- 
cgit v1.2.3


From 6c3de8996fbe9447e092cc75ccdd6f720fabaf4d Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Wed, 19 Aug 2015 20:25:22 +0200
Subject: mesa/es3.1 : Align OpenGL ES 3.1 glBindVertexBuffer error handling
 with OpenGL Core

According to OpenGL ES 3.1 specification 10.3.1:
"An INVALID_OPERATION error is generated if buffer is not zero
or a name returned from a previous call to GenBuffers,
or if such a name has since been deleted with DeleteBuffers."
This error check was previously limited to OpenGL Core.

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/bufferobj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 3b87654c5d2..23da83e8b80 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -916,7 +916,7 @@ _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
 {
    struct gl_buffer_object *buf = *buf_handle;
 
-   if (!buf && ctx->API == API_OPENGL_CORE) {
+   if (!buf && (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx))) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(non-gen name)", caller);
       return false;
    }
-- 
cgit v1.2.3


From cf293e518ebd847cb28e03d4378679c47548206d Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Wed, 19 Aug 2015 20:25:23 +0200
Subject: mesa/es3.1: Allow query of Vertex bindings for GLES 3.1

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/get.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index d5df53057d7..926caef7031 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1969,7 +1969,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
       return TYPE_INT64;
 
    case GL_VERTEX_BINDING_DIVISOR:
-      if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays)
+      if ((!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays) &&
+          !_mesa_is_gles31(ctx))
           goto invalid_enum;
       if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
           goto invalid_value;
@@ -1977,7 +1978,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
       return TYPE_INT;
 
    case GL_VERTEX_BINDING_OFFSET:
-      if (!_mesa_is_desktop_gl(ctx))
+      if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
           goto invalid_enum;
       if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
           goto invalid_value;
@@ -1985,7 +1986,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
       return TYPE_INT;
 
    case GL_VERTEX_BINDING_STRIDE:
-      if (!_mesa_is_desktop_gl(ctx))
+      if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
           goto invalid_enum;
       if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
           goto invalid_value;
-- 
cgit v1.2.3


From 419210005a84f1f26da353b945b3f783d53fa56a Mon Sep 17 00:00:00 2001
From: Marta Lofstedt <marta.lofstedt@intel.com>
Date: Wed, 19 Aug 2015 20:25:24 +0200
Subject: mesa/es3.1: Enable GL_ARB_vertex_attrib_binding functionality for
 GLES 3.1

Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/mesa/main/varray.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index c3c2424574c..4df57c148c7 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -899,12 +899,12 @@ get_vertex_array_attrib(struct gl_context *ctx,
       }
       goto error;
    case GL_VERTEX_ATTRIB_BINDING:
-      if (_mesa_is_desktop_gl(ctx)) {
+      if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
          return array->VertexBinding - VERT_ATTRIB_GENERIC0;
       }
       goto error;
    case GL_VERTEX_ATTRIB_RELATIVE_OFFSET:
-      if (_mesa_is_desktop_gl(ctx)) {
+      if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
          return array->RelativeOffset;
       }
       goto error;
@@ -1682,7 +1682,7 @@ vertex_array_vertex_buffer(struct gl_context *ctx,
       return;
    }
 
-   if (ctx->API == API_OPENGL_CORE && ctx->Version >= 44 &&
+   if (((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || _mesa_is_gles31(ctx)) &&
        stride > ctx->Const.MaxVertexAttribStride) {
       _mesa_error(ctx, GL_INVALID_VALUE, "%s(stride=%d > "
                   "GL_MAX_VERTEX_ATTRIB_STRIDE)", func, stride);
@@ -1732,7 +1732,7 @@ _mesa_BindVertexBuffer(GLuint bindingIndex, GLuint buffer, GLintptr offset,
     *    "An INVALID_OPERATION error is generated if no vertex array object
     *     is bound."
     */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
        ctx->Array.VAO == ctx->Array.DefaultVAO) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBindVertexBuffer(No array object bound)");
@@ -1954,7 +1954,7 @@ vertex_attrib_format(GLuint attribIndex, GLint size, GLenum type,
     * is an oversight.  In the OpenGL 4.3 (Core Profile) spec, it applies
     * to all three functions.
     */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
        ctx->Array.VAO == ctx->Array.DefaultVAO) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "%s(No array object bound)", func);
@@ -2142,7 +2142,7 @@ _mesa_VertexAttribBinding(GLuint attribIndex, GLuint bindingIndex)
     *    "An INVALID_OPERATION error is generated if no vertex array object
     *     is bound."
     */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
        ctx->Array.VAO == ctx->Array.DefaultVAO) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glVertexAttribBinding(No array object bound)");
@@ -2216,7 +2216,7 @@ _mesa_VertexBindingDivisor(GLuint bindingIndex, GLuint divisor)
     *    "An INVALID_OPERATION error is generated if no vertex array object
     *     is bound."
     */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
        ctx->Array.VAO == ctx->Array.DefaultVAO) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glVertexBindingDivisor(No array object bound)");
-- 
cgit v1.2.3


From 76dbab0a694d221ce0ea51d8c0b83a4529c42d29 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 17 Sep 2015 09:45:20 -0600
Subject: mesa: whitespace, comment fixes in texstorage.c

---
 src/mesa/main/texstorage.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index c53bb295c66..a29175e6cc6 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -22,14 +22,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-
 /**
  * \file texstorage.c
  * GL_ARB_texture_storage functions
  */
 
-
-
 #include "glheader.h"
 #include "context.h"
 #include "enums.h"
@@ -110,7 +107,7 @@ legal_texobj_target(struct gl_context *ctx, GLuint dims, GLenum target)
 
 /** Helper to get a particular texture image in a texture object */
 static struct gl_texture_image *
-get_tex_image(struct gl_context *ctx, 
+get_tex_image(struct gl_context *ctx,
               struct gl_texture_object *texObj,
               GLuint face, GLuint level)
 {
@@ -151,7 +148,8 @@ initialize_texture_fields(struct gl_context *ctx,
                                     0, internalFormat, texFormat);
       }
 
-      _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth,
+      _mesa_next_mipmap_level_size(target, 0,
+                                   levelWidth, levelHeight, levelDepth,
                                    &levelWidth, &levelHeight, &levelDepth);
    }
    return GL_TRUE;
@@ -246,6 +244,7 @@ _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
    }
 }
 
+
 /**
  * Default ctx->Driver.AllocTextureStorage() handler.
  *
@@ -306,7 +305,7 @@ tex_storage_error_check(struct gl_context *ctx,
                   "glTex%sStorage%uD(width, height or depth < 1)",
                   suffix, dims);
       return GL_TRUE;
-   }  
+   }
 
    if (_mesa_is_compressed_format(ctx, internalformat)) {
       GLenum err;
@@ -323,7 +322,7 @@ tex_storage_error_check(struct gl_context *ctx,
       _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sStorage%uD(levels < 1)",
                   suffix, dims);
       return GL_TRUE;
-   }  
+   }
 
    /* check levels against maximum (note different error than above) */
    if (levels > (GLint) _mesa_max_texture_levels(ctx, target)) {
@@ -390,7 +389,6 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,
       return; /* error was recorded */
    }
 
-
    texFormat = _mesa_choose_texture_format(ctx, texObj, target, 0,
                                            internalformat, GL_NONE, GL_NONE);
    assert(texFormat != MESA_FORMAT_NONE);
@@ -456,6 +454,7 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,
    }
 }
 
+
 /**
  * Helper used by _mesa_TexStorage1/2/3D().
  */
@@ -466,9 +465,9 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   /* target check */
-   /* This is done here so that _mesa_texture_storage can receive unsized
-    * formats. */
+   /* Check target.  This is done here so that _mesa_texture_storage
+    * can receive unsized formats.
+    */
    if (!legal_texobj_target(ctx, dims, target)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTexStorage%uD(illegal target=%s)",
@@ -482,6 +481,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
                   _mesa_enum_to_string(target), levels,
                   _mesa_enum_to_string(internalformat),
                   width, height, depth);
+
    /* Check the format to make sure it is sized. */
    if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
@@ -498,6 +498,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
                          internalformat, width, height, depth, false);
 }
 
+
 /**
  * Helper used by _mesa_TextureStorage1/2/3D().
  */
@@ -531,9 +532,9 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
       return;
    }
 
-   /* target check */
-   /* This is done here so that _mesa_texture_storage can receive unsized
-    * formats. */
+   /* Check target.  This is done here so that _mesa_texture_storage
+    * can receive unsized formats.
+    */
    if (!legal_texobj_target(ctx, dims, texObj->Target)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTextureStorage%uD(illegal target=%s)",
@@ -545,6 +546,7 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
                          levels, internalformat, width, height, depth, true);
 }
 
+
 void GLAPIENTRY
 _mesa_TexStorage1D(GLenum target, GLsizei levels, GLenum internalformat,
                    GLsizei width)
@@ -568,6 +570,7 @@ _mesa_TexStorage3D(GLenum target, GLsizei levels, GLenum internalformat,
    texstorage(3, target, levels, internalformat, width, height, depth);
 }
 
+
 void GLAPIENTRY
 _mesa_TextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat,
                        GLsizei width)
@@ -584,6 +587,7 @@ _mesa_TextureStorage2D(GLuint texture, GLsizei levels,
    texturestorage(2, texture, levels, internalformat, width, height, 1);
 }
 
+
 void GLAPIENTRY
 _mesa_TextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat,
                        GLsizei width, GLsizei height, GLsizei depth)
@@ -637,7 +641,6 @@ _mesa_TextureStorage2DEXT(GLuint texture, GLenum target, GLsizei levels,
 }
 
 
-
 void GLAPIENTRY
 _mesa_TextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels,
                           GLenum internalformat,
-- 
cgit v1.2.3


From 4879b766015eebd01911b40df8bef10081b8fce3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 17 Sep 2015 09:45:42 -0600
Subject: mesa: const-qualify buffer_object_subdata_range_good() bufObj
 parameter

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/bufferobj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 23da83e8b80..f87cea97557 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -246,7 +246,7 @@ bufferobj_range_mapped(const struct gl_buffer_object *obj,
  */
 static bool
 buffer_object_subdata_range_good(struct gl_context *ctx,
-                                 struct gl_buffer_object *bufObj,
+                                 const struct gl_buffer_object *bufObj,
                                  GLintptr offset, GLsizeiptr size,
                                  bool mappedRange, const char *caller)
 {
-- 
cgit v1.2.3


From acee1a322d0a483aa155ff05cfaa124e84865656 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 17 Sep 2015 09:47:36 -0600
Subject: mesa: const-qualify _mesa_base_tex_format() ctx param

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/teximage.c | 2 +-
 src/mesa/main/teximage.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 8913a72ad03..9bc176acf04 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -130,7 +130,7 @@ adjust_for_oes_float_texture(GLenum format, GLenum type)
  * texture format and env mode determine the arithmetic used.
  */
 GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
 {
    switch (internalFormat) {
    case GL_ALPHA:
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index a4736b5a29f..a4347204962 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -60,7 +60,7 @@ _mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
 /*@{*/
 
 extern GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat );
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat);
 
 
 extern GLboolean
-- 
cgit v1.2.3


From b590ffd0f95bfe5ff7d44412e4916bba40630be9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 17 Sep 2015 09:49:07 -0600
Subject: mesa: const-qualify _mesa_is_legal_tex_storage_format ctx param

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/texstorage.c | 3 ++-
 src/mesa/main/texstorage.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index a29175e6cc6..9fd969fbc53 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -202,7 +202,8 @@ update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
 
 
 GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+                                  GLenum internalformat)
 {
    /* check internal format - note that only sized formats are allowed */
    switch (internalformat) {
diff --git a/src/mesa/main/texstorage.h b/src/mesa/main/texstorage.h
index 033ecb7edaa..e80a9ff5b99 100644
--- a/src/mesa/main/texstorage.h
+++ b/src/mesa/main/texstorage.h
@@ -111,7 +111,8 @@ _mesa_TextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels,
                           GLsizei width, GLsizei height, GLsizei depth);
 
 extern GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat);
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+                                  GLenum internalformat);
 
 extern GLboolean
 _mesa_AllocTextureStorage_sw(struct gl_context *ctx,
-- 
cgit v1.2.3


From 4a03066e5a23058d8cf5a3c34b889ec484c36034 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 21 Sep 2015 09:03:45 -0600
Subject: st/mesa: remove st_bind_framebuffer()

The function was a no-op and if the ctx->Driver.BindFramebuffer pointer
is null, Mesa won't try to use it.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/state_tracker/st_cb_fbo.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 57075904450..9d06a232bfa 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -387,17 +387,6 @@ st_new_renderbuffer_fb(enum pipe_format format, int samples, boolean sw)
 }
 
 
-/**
- * Called via ctx->Driver.BindFramebufferEXT().
- */
-static void
-st_bind_framebuffer(struct gl_context *ctx, GLenum target,
-                    struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
-{
-   /* no-op */
-}
-
-
 /**
  * Create or update the pipe_surface of a FBO renderbuffer.
  * This is usually called after st_finalize_texture.
@@ -839,7 +828,6 @@ void st_init_fbo_functions(struct dd_function_table *functions)
 {
    functions->NewFramebuffer = st_new_framebuffer;
    functions->NewRenderbuffer = st_new_renderbuffer;
-   functions->BindFramebuffer = st_bind_framebuffer;
    functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
    functions->RenderTexture = st_render_texture;
    functions->FinishRenderTexture = st_finish_render_texture;
-- 
cgit v1.2.3


From 1bd89db921105dbe76047144b4719d4617aee1d8 Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Wed, 16 Sep 2015 17:19:50 +0200
Subject: i965/vec4: refactor brw_vec4_copy_propagation.

Now it is more similar to brw_fs_copy_propagation, with three
clear stages:

1) Build up the value we are propagating as if it were the source of a
single MOV:
2) Check that we can propagate that value
3) Build the final value

Previously everything was somewhat messed up, making the
implementation on some specific cases, like knowing if you can
propagate from a previous instruction even with type mismatches, even
messier (for example, with the need of maintaining more of one
has_source_modifiers). The refactoring clears stuff, and gives
support to this mentioned use case without doing anything extra
(for example, only one has_source_modifiers is used).

Shader-db results for vec4 programs on Haswell:
total instructions in shared programs: 1683842 -> 1669037 (-0.88%)
instructions in affected programs:     739837 -> 725032 (-2.00%)
helped:                                6237
HURT:                                  0

v2: using 'arg' index to get the from inst was wrong
v3: rebased against last change on the previous patch of the series
v4: don't need to track instructions on struct copy_entry, as we
    only set the source on a direct copy
v5: change the approach for a refactoring
v6: tweaked comments

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 32 ++++++++++++----------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 1522eeabb1c..d3f0ddde258 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -265,6 +265,9 @@ try_copy_propagate(const struct brw_device_info *devinfo,
                    vec4_instruction *inst,
                    int arg, struct copy_entry *entry)
 {
+   /* Build up the value we are propagating as if it were the source of a
+    * single MOV
+    */
    /* For constant propagation, we only handle the same constant
     * across all 4 channels.  Some day, we should handle the 8-bit
     * float vector format, which would let us constant propagate
@@ -291,9 +294,9 @@ try_copy_propagate(const struct brw_device_info *devinfo,
    for (int i = 0; i < 4; i++) {
       s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
    }
-   value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
-                                       BRW_SWIZZLE4(s[0], s[1], s[2], s[3]));
+   value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
 
+   /* Check that we can propagate that value */
    if (value.file != UNIFORM &&
        value.file != GRF &&
        value.file != ATTR)
@@ -304,13 +307,6 @@ try_copy_propagate(const struct brw_device_info *devinfo,
       return false;
    }
 
-   if (inst->src[arg].abs) {
-      value.negate = false;
-      value.abs = true;
-   }
-   if (inst->src[arg].negate)
-      value.negate = !value.negate;
-
    bool has_source_modifiers = value.negate || value.abs;
 
    /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
@@ -376,19 +372,27 @@ try_copy_propagate(const struct brw_device_info *devinfo,
       }
    }
 
+   /* Build the final value */
+   if (inst->src[arg].abs) {
+      value.negate = false;
+      value.abs = true;
+   }
+   if (inst->src[arg].negate)
+      value.negate = !value.negate;
+
+   value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
+                                       value.swizzle);
    if (has_source_modifiers &&
        value.type != inst->src[arg].type) {
-      /* We are propagating source modifiers from a MOV with a different
-       * type.  If we got here, then we can just change the source and
-       * destination types of the instruction and keep going.
-       */
       assert(can_change_source_types(inst));
       for (int i = 0; i < 3; i++) {
          inst->src[i].type = value.type;
       }
       inst->dst.type = value.type;
-   } else
+   } else {
       value.type = inst->src[arg].type;
+   }
+
    inst->src[arg] = value;
    return true;
 }
-- 
cgit v1.2.3


From 8ae8feca844c953510e94f116be2fc37fb14ffe0 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Wed, 23 Sep 2015 01:50:31 +0300
Subject: r600g: update num_dw in scissor_enable workaround

"r600g: apply disable workaround on all scissors" forgot to update
num_dw, fix it.

Fixes: fbb423b433 "r600g: apply disable workaround on all scissors"
Reported-and-tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_state_common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 21c89dc0b61..efce852eafa 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -367,6 +367,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
 	    rs->scissor_enable != rctx->scissor.enable) {
 		rctx->scissor.enable = rs->scissor_enable;
 		rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+		rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
 		r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
 	}
 
-- 
cgit v1.2.3


From f5991ebf3418c48486bda072ad7aba247bc21923 Mon Sep 17 00:00:00 2001
From: Chris Forbes <chrisf@ijw.co.nz>
Date: Tue, 9 Sep 2014 21:15:09 +1200
Subject: i965: Add defines for tessellation stages

v2 (Ken):
- Squash together commits for HS, DS, and TE, as well as fixes.
- Add INTEL_MASK variants so we can use SET_FIELD if we want.
- Rename GEN7_HS_INSTANCE_CONTROL to GEN7_HS_INSTANCE_COUNT to match
  the documentation.
- Add some more fields from the PRMs.
- Add Broadwell variants.

Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 72 +++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8fc8cebf11e..f9d8d1b98f2 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1790,6 +1790,8 @@ enum brw_message_target {
 /* DW3: PS */
 
 #define _3DSTATE_SAMPLER_STATE_POINTERS_VS	0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_HS	0x782C /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_DS	0x782D /* GEN7+ */
 #define _3DSTATE_SAMPLER_STATE_POINTERS_GS	0x782E /* GEN7+ */
 #define _3DSTATE_SAMPLER_STATE_POINTERS_PS	0x782F /* GEN7+ */
 
@@ -1873,6 +1875,8 @@ enum brw_message_target {
 #define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
 
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS         0x7913 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS         0x7914 /* GEN7+ */
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
@@ -1975,8 +1979,76 @@ enum brw_message_target {
 #define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES		(62*16)
 
 #define _3DSTATE_HS                             0x781B /* GEN7+ */
+/* DW1 */
+# define GEN7_HS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+# define GEN7_HS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_HS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_HS_FLOATING_POINT_MODE_ALT                (1 << 16)
+# define GEN7_HS_MAX_THREADS_SHIFT                      0
+/* DW2 */
+# define GEN7_HS_ENABLE                                 (1 << 31)
+# define GEN7_HS_STATISTICS_ENABLE                      (1 << 29)
+# define GEN8_HS_MAX_THREADS_SHIFT                      8
+# define GEN7_HS_INSTANCE_COUNT_MASK                    INTEL_MASK(3, 0)
+# define GEN7_HS_INSTANCE_COUNT_SHIFT                   0
+/* DW5 */
+# define GEN7_HS_SINGLE_PROGRAM_FLOW                    (1 << 27)
+# define GEN7_HS_VECTOR_MASK_ENABLE                     (1 << 26)
+# define HSW_HS_ACCESSES_UAV                            (1 << 25)
+# define GEN7_HS_INCLUDE_VERTEX_HANDLES                 (1 << 24)
+# define GEN7_HS_DISPATCH_START_GRF_MASK                INTEL_MASK(23, 19)
+# define GEN7_HS_DISPATCH_START_GRF_SHIFT               19
+# define GEN7_HS_URB_READ_LENGTH_MASK                   INTEL_MASK(16, 11)
+# define GEN7_HS_URB_READ_LENGTH_SHIFT                  11
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT            4
+
 #define _3DSTATE_TE                             0x781C /* GEN7+ */
+/* DW1 */
+# define GEN7_TE_PARTITIONING_SHIFT                     12
+# define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT                  8
+# define GEN7_TE_DOMAIN_SHIFT                           4
+//# define GEN7_TE_MODE_SW                                (1 << 1)
+# define GEN7_TE_ENABLE                                 (1 << 0)
+
 #define _3DSTATE_DS                             0x781D /* GEN7+ */
+/* DW2 */
+# define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH           (1 << 31)
+# define GEN7_DS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN7_DS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+# define GEN7_DS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_DS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_DS_FLOATING_POINT_MODE_ALT                (1 << 16)
+# define HSW_DS_ACCESSES_UAV                            (1 << 14)
+/* DW4 */
+# define GEN7_DS_DISPATCH_START_GRF_MASK                INTEL_MASK(24, 20)
+# define GEN7_DS_DISPATCH_START_GRF_SHIFT               20
+# define GEN7_DS_URB_READ_LENGTH_MASK                   INTEL_MASK(17, 11)
+# define GEN7_DS_URB_READ_LENGTH_SHIFT                  11
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT            4
+/* DW5 */
+# define GEN7_DS_MAX_THREADS_SHIFT                      25
+# define HSW_DS_MAX_THREADS_SHIFT                       21
+# define GEN7_DS_STATISTICS_ENABLE                      (1 << 10)
+# define GEN7_DS_SIMD8_DISPATCH_ENABLE                  (1 << 3)
+# define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE            (1 << 2)
+# define GEN7_DS_CACHE_DISABLE                          (1 << 1)
+# define GEN7_DS_ENABLE                                 (1 << 0)
+/* Gen8+ DW8 */
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK           INTEL_MASK(26, 21)
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+# define GEN8_DS_URB_OUTPUT_LENGTH_MASK                 INTEL_MASK(20, 16)
+# define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT                16
+# define GEN8_DS_USER_CLIP_DISTANCE_MASK                INTEL_MASK(15, 8)
+# define GEN8_DS_USER_CLIP_DISTANCE_SHIFT               8
+# define GEN8_DS_USER_CULL_DISTANCE_MASK                INTEL_MASK(7, 0)
+# define GEN8_DS_USER_CULL_DISTANCE_SHIFT               0
+
 
 #define _3DSTATE_CLIP				0x7812 /* GEN6+ */
 /* DW1 */
-- 
cgit v1.2.3


From 2b79db2c0216075f5047fe3723d100fd70fc59c7 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 17:14:45 -0700
Subject: nir/lower_alu_to_scalar: Use the builder

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_lower_alu_to_scalar.c | 47 ++++++++++++++++------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index 710bb37409f..5ef5ec2b316 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -22,6 +22,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 
 /** @file nir_lower_alu_to_scalar.c
  *
@@ -38,13 +39,13 @@ nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
 
 static void
 lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
-                void *mem_ctx)
+                nir_builder *builder)
 {
    unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
 
    nir_ssa_def *last = NULL;
    for (unsigned i = 0; i < num_components; i++) {
-      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
+      nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
       nir_alu_ssa_dest_init(chan, 1);
       nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
       chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
@@ -54,18 +55,13 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
       }
 
-      nir_instr_insert_before(&instr->instr, &chan->instr);
+      nir_builder_instr_insert(builder, &chan->instr);
 
       if (i == 0) {
          last = &chan->dest.dest.ssa;
       } else {
-         nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
-         nir_alu_ssa_dest_init(merge, 1);
-         merge->dest.write_mask = 1;
-         merge->src[0].src = nir_src_for_ssa(last);
-         merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
-         nir_instr_insert_before(&instr->instr, &merge->instr);
-         last = &merge->dest.dest.ssa;
+         last = nir_build_alu(builder, merge_op,
+                              last, &chan->dest.dest.ssa, NULL, NULL);
       }
    }
 
@@ -75,7 +71,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
 }
 
 static void
-lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
+lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
 {
    unsigned num_src = nir_op_infos[instr->op].num_inputs;
    unsigned i, chan;
@@ -83,11 +79,13 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
    assert(instr->dest.dest.is_ssa);
    assert(instr->dest.write_mask != 0);
 
+   b->cursor = nir_before_instr(&instr->instr);
+
 #define LOWER_REDUCTION(name, chan, merge) \
    case name##2: \
    case name##3: \
    case name##4: \
-      lower_reduction(instr, chan, merge, mem_ctx); \
+      lower_reduction(instr, chan, merge, b); \
       break;
 
    switch (instr->op) {
@@ -134,16 +132,13 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
       return;
 
    unsigned num_components = instr->dest.dest.ssa.num_components;
-   static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
-   nir_alu_instr *vec_instr =
-      nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
-   nir_alu_ssa_dest_init(vec_instr, num_components);
+   nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
 
    for (chan = 0; chan < 4; chan++) {
       if (!(instr->dest.write_mask & (1 << chan)))
          continue;
 
-      nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
+      nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
       for (i = 0; i < num_src; i++) {
          /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
           * args (input_sizes[] == 1).
@@ -159,25 +154,24 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
 
       nir_alu_ssa_dest_init(lower, 1);
       lower->dest.saturate = instr->dest.saturate;
-      vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
+      comps[chan] = &lower->dest.dest.ssa;
 
-      nir_instr_insert_before(&instr->instr, &lower->instr);
+      nir_builder_instr_insert(b, &lower->instr);
    }
 
-   nir_instr_insert_before(&instr->instr, &vec_instr->instr);
+   nir_ssa_def *vec = nir_vec(b, comps, num_components);
 
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
-                            nir_src_for_ssa(&vec_instr->dest.dest.ssa));
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
 
    nir_instr_remove(&instr->instr);
 }
 
 static bool
-lower_alu_to_scalar_block(nir_block *block, void *data)
+lower_alu_to_scalar_block(nir_block *block, void *builder)
 {
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_alu)
-         lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
+         lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
    }
 
    return true;
@@ -186,7 +180,10 @@ lower_alu_to_scalar_block(nir_block *block, void *data)
 static void
 nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
 {
-   nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
+   nir_builder builder;
+   nir_builder_init(&builder, impl);
+
+   nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
 }
 
 void
-- 
cgit v1.2.3


From 0f9bf647705db48a88c700bfa7ca3d4a9f6b4f56 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 17:16:59 -0700
Subject: nir/lower_alu_to_scalar: Return after lower_reduction

We don't use any of the code after the switch anyway.  Since we check for
num_components == 1 and early-return, it doesn't get executed so
everything's ok.  However, it makes it much clearer what's going on if we
simply do an early return.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_lower_alu_to_scalar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index 5ef5ec2b316..84d4943befc 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -86,7 +86,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
    case name##3: \
    case name##4: \
       lower_reduction(instr, chan, merge, b); \
-      break;
+      return;
 
    switch (instr->op) {
    case nir_op_vec4:
-- 
cgit v1.2.3


From e5a9346d0046b0a78620687e75239b30e3876662 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 16:54:27 -0700
Subject: nir: Add fdph and fdph_replicated opcodes

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 3 ++-
 src/glsl/nir/nir_opcodes.py          | 5 +++++
 src/glsl/nir/nir_opt_algebraic.py    | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index b7ee4e8d33c..622e59c688e 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -89,7 +89,8 @@ has_replicated_dest(nir_alu_instr *alu)
 {
    return alu->op == nir_op_fdot_replicated2 ||
           alu->op == nir_op_fdot_replicated3 ||
-          alu->op == nir_op_fdot_replicated4;
+          alu->op == nir_op_fdot_replicated4 ||
+          alu->op == nir_op_fdph_replicated;
 }
 
 /* Attempts to coalesce the "move" from the given source of the vec to the
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 495d109375b..f2d584fe484 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -456,6 +456,11 @@ binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
 binop_reduce("fdot_replicated", 4, tfloat, tfloat,
              "{src0} * {src1}", "{src0} + {src1}", "{src}")
 
+opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 43558a547b4..585e5e0ae98 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -244,6 +244,7 @@ late_optimizations = [
    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
    (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
 ]
 
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
-- 
cgit v1.2.3


From 2e5423ad6345e027bb40c75ffc0e9e64843b9c05 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 16:55:42 -0700
Subject: i965/vec4: Add support for fdph_replicated

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 20c063d0010..c681ae438ef 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1268,6 +1268,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       inst->saturate = instr->dest.saturate;
       break;
 
+   case nir_op_fdph_replicated:
+      inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
    case nir_op_bany2:
    case nir_op_bany3:
    case nir_op_bany4: {
-- 
cgit v1.2.3


From 999ff3c77d5d978224ee01ca19121e15698c2c6a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 17:29:49 -0700
Subject: nir/lower_alu_to_scalar: Add support for nir_op_fdph

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir_lower_alu_to_scalar.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c
index 84d4943befc..9313fc0f97e 100644
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -112,6 +112,24 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
        */
       return;
 
+   case nir_op_fdph: {
+      nir_ssa_def *sum[4];
+      for (unsigned i = 0; i < 3; i++) {
+         sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+                                          instr->src[0].swizzle[i]),
+                              nir_channel(b, instr->src[1].src.ssa,
+                                          instr->src[1].swizzle[i]));
+      }
+      sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+
+      nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+                                     nir_fadd(b, sum[2], sum[3]));
+
+      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+      nir_instr_remove(&instr->instr);
+      return;
+   }
+
       LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
       LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
       LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
-- 
cgit v1.2.3


From e7496fed2a2428b704b8aeab5f42df8e6a2f7ae1 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 16:57:03 -0700
Subject: prog_to_nir: Use nir_op_dph

Shader-db results on HSW:

   instructions in affected programs:     72 -> 56 (-22.22%)

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/program/prog_to_nir.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index ec61100356a..1bd735a47bb 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -527,8 +527,7 @@ ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 static void
 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 {
-   nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
-   ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
+   ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
 }
 
 static void
-- 
cgit v1.2.3


From 10da96887c785930c2553b2d5bde91e52b8b034a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 21 Sep 2015 13:58:19 -0700
Subject: i965/vec4: Detect and delete useless MOVs.

With NIR:

instructions in affected programs:     111508 -> 109193 (-2.08%)
helped:                                507

Without NIR:

instructions in affected programs:     28763 -> 28474 (-1.00%)
helped:                                186

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ed49cd38987..6e52d79e053 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1021,6 +1021,28 @@ vec4_visitor::opt_register_coalesce()
 	  inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
 	 continue;
 
+      /* Remove no-op MOVs */
+      if (inst->dst.file == inst->src[0].file &&
+          inst->dst.reg == inst->src[0].reg &&
+          inst->dst.reg_offset == inst->src[0].reg_offset) {
+         bool is_nop_mov = true;
+
+         for (unsigned c = 0; c < 4; c++) {
+            if ((inst->dst.writemask & (1 << c)) == 0)
+               continue;
+
+            if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+               is_nop_mov = false;
+               break;
+            }
+         }
+
+         if (is_nop_mov) {
+            inst->remove(block);
+            continue;
+         }
+      }
+
       bool to_mrf = (inst->dst.file == MRF);
 
       /* Can't coalesce this GRF if someone else was going to
-- 
cgit v1.2.3


From 89524e717147053fe9da6e30daa0aaffcd29d26d Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Tue, 22 Sep 2015 14:34:11 +0300
Subject: glsl: bail out early in _mesa_ShaderSource if no shaderobj
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch fixes a crash in conformance test that tries out different
invalid arguments for glShaderSource and glGetShaderSource:

   ES2-CTS.gtf.GL.glGetShaderSource.getshadersource_programhandle

This is a regression from commit:
   04e201d0c02cd30ace5c6fe80e9f021ebb733682

Additions in v2 also fix following failing deqp test:
   dEQP-GLES[2|3].functional.negative_api.shader.shader_source

v2: cleanup function, do check earlier (Iago Toral)

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/main/shaderapi.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index f31980b40d9..edc23bcefe3 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -931,13 +931,9 @@ get_shader_source(struct gl_context *ctx, GLuint shader, GLsizei maxLength,
  * glShaderSource[ARB].
  */
 static void
-shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
+shader_source(struct gl_shader *sh, const GLchar *source)
 {
-   struct gl_shader *sh;
-
-   sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
-   if (!sh)
-      return;
+   assert(sh);
 
    /* free old shader source string and install new one */
    free((void *)sh->Source);
@@ -1639,13 +1635,17 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
    GLint *offsets;
    GLsizei i, totalLength;
    GLcharARB *source;
+   struct gl_shader *sh;
 
 #if defined(HAVE_SHA1)
    GLcharARB *replacement;
-   struct gl_shader *sh;
 #endif /* HAVE_SHA1 */
 
-   if (!shaderObj || string == NULL) {
+   sh = _mesa_lookup_shader_err(ctx, shaderObj, "glShaderSourceARB");
+   if (!sh)
+      return;
+
+   if (string == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
       return;
    }
@@ -1697,8 +1697,6 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
    source[totalLength - 2] = '\0';
 
 #if defined(HAVE_SHA1)
-   sh = _mesa_lookup_shader(ctx, shaderObj);
-
    /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
     * if corresponding entry found from MESA_SHADER_READ_PATH.
     */
@@ -1711,7 +1709,7 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
    }
 #endif /* HAVE_SHA1 */
 
-   shader_source(ctx, shaderObj, source);
+   shader_source(sh, source);
 
    free(offsets);
 }
-- 
cgit v1.2.3


From cf439951b791827677e96d29e209b5fc08d07a2e Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 24 Feb 2015 19:02:56 +0100
Subject: mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default
 framebuffer.

From section 9.2. Binding and Managing Framebuffer Objects:

"Upon successful return from Get*FramebufferAttachmentParameteriv, if
pname is FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, then params will contain
one of NONE, FRAMEBUFFER_DEFAULT, TEXTURE, or RENDERBUFFER, identifying
the type of object which contains the attached image."

And then it clarifies further:

"If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
either no framebuffer is bound to target; or the default framebuffer is
bound, attachment is DEPTH or STENCIL, and the number of depth or stencil
bits, respectively, is zero"

Currently, if the default framebuffer is bound, we always return
GL_FRAMEBUFFER_DEFAULT for FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, but
according to the spec, when GL_DEPTH or GL_STENCIL attachments are
the ones being queried, we should return GL_NONE if they don't exist.

Fixes the following dEQP test:
dEQP-GLES3.functional.state_query.fbo.framebuffer_attachment_x_size_initial

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Cc: "10.6" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/main/fbobject.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 6b6ebb78f75..fe6bdc2b4d1 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3611,7 +3611,16 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
 
    switch (pname) {
    case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT:
-      *params = _mesa_is_winsys_fbo(buffer)
+      /* From the OpenGL spec, 9.2. Binding and Managing Framebuffer Objects:
+       *
+       * "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
+       *  either no framebuffer is bound to target; or the default framebuffer
+       *  is bound, attachment is DEPTH or STENCIL, and the number of depth or
+       *  stencil bits, respectively, is zero."
+       */
+      *params = (_mesa_is_winsys_fbo(buffer) &&
+                 ((attachment != GL_DEPTH && attachment != GL_STENCIL) ||
+                  (att->Type != GL_NONE)))
          ? GL_FRAMEBUFFER_DEFAULT : att->Type;
       return;
    case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT:
-- 
cgit v1.2.3


From f2e75ac88a92ab2180de576aca298929cfce03f2 Mon Sep 17 00:00:00 2001
From: Antia Puentes <apuentes@igalia.com>
Date: Tue, 22 Sep 2015 18:17:45 +0200
Subject: i965/vec4: Don't coalesce regs in Gen6 MATH ops if
 reswizzle/writemask needed

Gen6 MATH instructions can not execute in align16 mode, so swizzles or
writemasking are not allowed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92033
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h |  3 ++-
 src/mesa/drivers/dri/i965/brw_vec4.cpp  | 12 ++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 6e8b16139d3..96dd633e117 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -175,7 +175,8 @@ public:
 
    bool is_send_from_grf();
    unsigned regs_read(unsigned arg) const;
-   bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
+   bool can_reswizzle(const struct brw_device_info *devinfo, int dst_writemask,
+                      int swizzle, int swizzle_mask);
    void reswizzle(int dst_writemask, int swizzle);
    bool can_do_source_mods(const struct brw_device_info *devinfo);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 6e52d79e053..c6510453479 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -941,10 +941,18 @@ vec4_visitor::opt_set_dependency_control()
 }
 
 bool
-vec4_instruction::can_reswizzle(int dst_writemask,
+vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+                                int dst_writemask,
                                 int swizzle,
                                 int swizzle_mask)
 {
+   /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+    * or writemasking are not allowed.
+    */
+   if (devinfo->gen == 6 && is_math() &&
+       (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+      return false;
+
    /* If this instruction sets anything not referenced by swizzle, then we'd
     * totally break it when we reswizzle.
     */
@@ -1099,7 +1107,7 @@ vec4_visitor::opt_register_coalesce()
                break;
 
             /* If we can't handle the swizzle, bail. */
-            if (!scan_inst->can_reswizzle(inst->dst.writemask,
+            if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
                                           inst->src[0].swizzle,
                                           chans_needed)) {
                break;
-- 
cgit v1.2.3


From d7bf7969b90f66ee614f2d2225f3a821d5396a89 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 11:37:12 -0700
Subject: t_dd_dmatmp: Make "count" actually be the count

The value passed in count previously was "vertex after the last vertex
to be processed."  Calling that "count" was misleading and kind of mean.
Looking at the code, many functions immediately do "count-start" to get
back the true count.  That's just silly.

If it is better for the loops to be 'for (j = start; j < (start +
count); j++)', GCC will do that transformation.

NOTE: There is some strange formatting left by this patch.  That was
done to make it more obvious that the before and after code is
equivalent.  These will be fixed in the next patch.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

v2: Fix a remaining (count-start) in render_quad_strip_verts.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com> [v1]
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/dri/i915/intel_render.c   |   2 +-
 src/mesa/drivers/dri/radeon/radeon_swtcl.c |   2 +-
 src/mesa/tnl_dd/t_dd_dmatmp.h              | 146 ++++++++++++++---------------
 3 files changed, 75 insertions(+), 75 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index 5962dad7d11..df21c6651df 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -251,7 +251,7 @@ intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
          continue;
 
       intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
-                                                     start + length, prim);
+                                                     length, prim);
    }
 
    tnl->Driver.Render.Finish(ctx);
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 2fbd353297b..7938c5301da 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -446,7 +446,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
 		 start, start+length);
 
       if (length)
-	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+         tab[prim & PRIM_MODE_MASK](ctx, start, length, prim);
    }
 
    tnl->Driver.Render.Finish( ctx );
diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be39541e43..443ada413a9 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -121,9 +121,9 @@ static void TAG(render_points_verts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
 
@@ -148,7 +148,7 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
 
       /* Emit whole number of lines in total and in each buffer:
        */
-      count -= (count-start) & 1;
+      count -= count & 1;
       currentsz = GET_CURRENT_VB_MAX_VERTS();
       currentsz -= currentsz & 1;
       dmasz -= dmasz & 1;
@@ -156,9 +156,9 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
 
@@ -186,9 +186,9 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
  
@@ -215,9 +215,9 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
       INIT( GL_LINE_STRIP );
 
       if (flags & PRIM_BEGIN)
-	 j = start;
+	 j = 0;
       else
-	 j = start + 1;
+	 j = 1;
 
       /* Ensure last vertex won't wrap buffers:
        */
@@ -234,23 +234,23 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );
 
 	    if (j + nr >= count &&
-		start < count - 1 && 
+		0 < count - 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_VERTS(nr+1);
-	       tmp = TAG(emit_verts)( ctx, j, nr, tmp );
+               tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
 	       tmp = TAG(emit_verts)( ctx, start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+               TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }
 
       }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (1 < count && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_VERTS(2);
 	 tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
@@ -284,14 +284,14 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx,
    /* Emit whole number of tris in total.  dmasz is already a multiple
     * of 3.
     */
-   count -= (count-start)%3;
+   count -= count % 3;
 
    if (currentsz < 8)
       currentsz = dmasz;
 
-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
       nr = MIN2( currentsz, count - j );
-      TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
       currentsz = dmasz;
    }
 }
@@ -322,9 +322,9 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
       dmasz -= (dmasz & 1);
       currentsz -= (currentsz & 1);
 
-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
 
@@ -354,12 +354,12 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
       }
 
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)( ctx, start + j, nr - 1, tmp );
 	 (void) tmp;
 	 currentsz = dmasz;
       }
@@ -394,12 +394,12 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
       }
 
-      for (j = start + 1 ; j + 1 < count ; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count ; j += nr - 2 ) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
 	 (void) tmp;
 	 currentsz = dmasz;
       }
@@ -437,9 +437,9 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
       dmasz -= (dmasz & 2);
       currentsz -= (currentsz & 2);
 
-      for (j = start ; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
 
@@ -465,7 +465,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 /* Emit whole number of quads in total, and in each buffer.
 	  */
 	 dmasz -= dmasz & 1;
-	 count -= (count-start) & 1;
+	 count -= count & 1;
 	 currentsz -= currentsz & 1;
 
 	 if (currentsz < 12)
@@ -474,14 +474,14 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;
 
-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+	 for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
 	    if (nr >= 4) {
 	       GLint quads = (nr/2)-1;
 	       GLint i;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );
 
-	       for ( i = j-start ; i < j-start+quads*2 ; i+=2 ) {
+               for (i = j; i < j + quads * 2; i += 2) {
 		  EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 		  EMIT_TWO_ELTS( 2, (i+2), (i+1) );
 		  EMIT_TWO_ELTS( 4, (i+3), (i+2) );
@@ -519,15 +519,15 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
       dmasz -= dmasz & 1;
       currentsz = GET_CURRENT_VB_MAX_VERTS();
       currentsz -= currentsz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;
 
       if (currentsz < 8) {
 	 currentsz = dmasz;
       }
 
-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
 
@@ -556,15 +556,15 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
       /* Emit whole number of quads in total.  dmasz is already a multiple
        * of 4.
        */
-      count -= (count-start)%4;
+      count -= count % 4;
 
       currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
       if (currentsz < 8)
          currentsz = dmasz;
 
-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
          nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
          currentsz = dmasz;
       }
    }
@@ -587,7 +587,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
       /* Emit whole number of quads in total, and in each buffer.
        */
       dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
+      count -= count & 3;
       currentsz -= currentsz & 3;
 
       /* Adjust for rendering as triangles:
@@ -598,14 +598,14 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
 	 if (nr >= 4) {
 	    GLint quads = nr/4;
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads*6 ) );
 
-	    for ( i = j-start ; i < j-start+quads*4 ; i+=4 ) {
+            for (i = j; i < j + quads * 4; i += 4) {
 	       EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 	       EMIT_TWO_ELTS( 2, (i+3), (i+1) );
 	       EMIT_TWO_ELTS( 4, (i+2), (i+3) );
@@ -629,15 +629,15 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 
       INIT(GL_TRIANGLES);
 
-      for (j = start; j < count-3; j += 4) {
+      for (j = 0; j < count-3; j += 4) {
 	 void *tmp = ALLOC_VERTS( 6 );
 	 /* Send v0, v1, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j,     2, tmp);
-	 tmp = EMIT_VERTS(ctx, j + 3, 1, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
 	 /* Send v1, v2, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j + 1, 3, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
 	 (void) tmp;
       }
    }
@@ -698,9 +698,9 @@ static void TAG(render_points_elts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
       }
@@ -728,7 +728,7 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
 
       /* Emit whole number of lines in total and in each buffer:
        */
-      count -= (count-start) & 1;
+      count -= count & 1;
       currentsz -= currentsz & 1;
       dmasz -= dmasz & 1;
 
@@ -736,9 +736,9 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
       }
@@ -768,9 +768,9 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
       }
@@ -799,9 +799,9 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
       ELT_INIT( GL_LINE_STRIP );
 
       if (flags & PRIM_BEGIN)
-	 j = start;
+	 j = 0;
       else
-	 j = start + 1;
+	 j = 1;
 
       currentsz = GET_CURRENT_VB_MAX_ELTS();
       if (currentsz < 8) {
@@ -818,23 +818,23 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );
 
 	    if (j + nr >= count &&
-		start < count - 1 && 
+		0 < count - 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_ELTS(nr+1);
-	       tmp = TAG(emit_elts)( ctx, elts+j, nr, tmp );
+               tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
 	       tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+               TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }
 
       }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (1 < count && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_ELTS(2);
 	 tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
@@ -874,14 +874,14 @@ static void TAG(render_triangles_elts)( struct gl_context *ctx,
    /* Emit whole number of tris in total.  dmasz is already a multiple
     * of 3.
     */
-   count -= (count-start)%3;
+   count -= count % 3;
    currentsz -= currentsz%3;
    if (currentsz < 8)
       currentsz = dmasz;
 
-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
       nr = MIN2( currentsz, count - j );
-      TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
       FLUSH();
       currentsz = dmasz;
    }
@@ -914,9 +914,9 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
       dmasz -= (dmasz & 1);
       currentsz -= (currentsz & 1);
 
-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+	 TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
 	 FLUSH();
 	 currentsz = dmasz;
       }
@@ -947,12 +947,12 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
       }
 
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -985,12 +985,12 @@ static void TAG(render_poly_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
       }
 
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -1023,7 +1023,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
       /* Emit whole number of quads in total, and in each buffer.
        */
       dmasz -= dmasz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;
       currentsz -= currentsz & 1;
 
       if (currentsz < 12)
@@ -1035,7 +1035,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;
 
-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
 
 	    if (nr >= 4)
@@ -1044,7 +1044,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	       GLint quads = (nr/2)-1;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );
 
-	       for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
+               for (i = j; i < j + quads; i++, elts += 2) {
 		  EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 		  EMIT_TWO_ELTS( 2, elts[2], elts[1] );
 		  EMIT_TWO_ELTS( 4, elts[3], elts[2] );
@@ -1060,9 +1060,9 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
       else {
 	 ELT_INIT( GL_TRIANGLE_STRIP );
 
-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
-	    TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	    FLUSH();
 	    currentsz = dmasz;
 	 }
@@ -1088,14 +1088,14 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 
       currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
 
-      count -= (count-start)%4;
+      count -= count % 4;
 
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
       }
@@ -1112,7 +1112,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
       /* Emit whole number of quads in total, and in each buffer.
        */
       dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
+      count -= count & 3;
       currentsz -= currentsz & 3;
 
       /* Adjust for rendering as triangles:
@@ -1123,7 +1123,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
       if (currentsz < 8)
 	 currentsz = dmasz;
 
-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
 
 	 if (nr >= 4)
@@ -1132,7 +1132,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads * 6 ) );
 
-	    for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
+	    for (i = j; i < j + quads; i++, elts += 4) {
 	       EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 	       EMIT_TWO_ELTS( 2, elts[3], elts[1] );
 	       EMIT_TWO_ELTS( 4, elts[2], elts[3] );
-- 
cgit v1.2.3


From fad8d54de7e7f908cb0d06f0b54af8440e689928 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 11:46:50 -0700
Subject: t_dd_dmatmp: Clean up improper code formatting from previous patch

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 443ada413a9..b6f6e84e540 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -214,10 +214,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 
       INIT( GL_LINE_STRIP );
 
-      if (flags & PRIM_BEGIN)
-	 j = 0;
-      else
-	 j = 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;
 
       /* Ensure last vertex won't wrap buffers:
        */
@@ -234,7 +231,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );
 
 	    if (j + nr >= count &&
-		0 < count - 1 &&
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
@@ -250,7 +247,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 	 }
 
       }
-      else if (1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_VERTS(2);
 	 tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
@@ -798,10 +795,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
       FLUSH();
       ELT_INIT( GL_LINE_STRIP );
 
-      if (flags & PRIM_BEGIN)
-	 j = 0;
-      else
-	 j = 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;
 
       currentsz = GET_CURRENT_VB_MAX_ELTS();
       if (currentsz < 8) {
@@ -818,7 +812,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );
 
 	    if (j + nr >= count &&
-		0 < count - 1 &&
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
@@ -834,7 +828,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 	 }
 
       }
-      else if (1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_ELTS(2);
 	 tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
-- 
cgit v1.2.3


From 0d475ee2b989ac1697720ca391913e9158156bdc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 11:50:28 -0700
Subject: t_dd_dmatmp: Use '& 3' instead of '% 4' everywhere

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index b6f6e84e540..a7cabc24977 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -553,7 +553,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
       /* Emit whole number of quads in total.  dmasz is already a multiple
        * of 4.
        */
-      count -= count % 4;
+      count -= count & 3;
 
       currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
       if (currentsz < 8)
@@ -1082,7 +1082,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 
       currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
 
-      count -= count % 4;
+      count -= count & 3;
 
       if (currentsz < 8)
 	 currentsz = dmasz;
-- 
cgit v1.2.3


From c0b3b2f7603eab210acdb2e654e5411fe912ca34 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 11:56:20 -0700
Subject: t_dd_dmatmp: Pull out common 'count -= count & 3' code

This was missing in the HAVE_TRIANGLES path, and that could cause
incorrect rendering.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Reviewed-by: Brian Paul <brianp@vmware.com>
Cc: Marius Predut <marius.predut@intel.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index a7cabc24977..6e60acd50c9 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -542,6 +542,9 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 				     GLuint count,
 				     GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
    if (HAVE_QUADS) {
       LOCAL_VARS;
       int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
@@ -550,11 +553,6 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 
       INIT(GL_QUADS);
 
-      /* Emit whole number of quads in total.  dmasz is already a multiple
-       * of 4.
-       */
-      count -= count & 3;
-
       currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
       if (currentsz < 8)
          currentsz = dmasz;
@@ -584,7 +582,6 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
       /* Emit whole number of quads in total, and in each buffer.
        */
       dmasz -= dmasz & 3;
-      count -= count & 3;
       currentsz -= currentsz & 3;
 
       /* Adjust for rendering as triangles:
@@ -1070,6 +1067,9 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 				    GLuint count,
 				    GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
    if (HAVE_QUADS) {
       LOCAL_VARS;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
@@ -1082,8 +1082,6 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 
       currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
 
-      count -= count & 3;
-
       if (currentsz < 8)
 	 currentsz = dmasz;
 
@@ -1106,7 +1104,6 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
       /* Emit whole number of quads in total, and in each buffer.
        */
       dmasz -= dmasz & 3;
-      count -= count & 3;
       currentsz -= currentsz & 3;
 
       /* Adjust for rendering as triangles:
-- 
cgit v1.2.3


From 25543d8ec506ef32599af6f5e0dd735e01b39909 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 11:59:22 -0700
Subject: t_dd_dmatmp: Use addition instead of subtraction in loop bounds

This is used everywhere else in this file because it avoids problems
when count is zero (due to trimming).

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Reviewed-by: Brian Paul <brianp@vmware.com>
Cc: Marius Predut <marius.predut@intel.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 6e60acd50c9..b127605c4cc 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -623,7 +623,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 
       INIT(GL_TRIANGLES);
 
-      for (j = 0; j < count-3; j += 4) {
+      for (j = 0; j + 3 < count; j += 4) {
 	 void *tmp = ALLOC_VERTS( 6 );
 	 /* Send v0, v1, v3
 	  */
-- 
cgit v1.2.3


From 249ba09f59af870dc55eacede5b1e164873c397d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 12:36:33 -0700
Subject: t_dd_dmatmp: Remove HAVE_QUAD_STRIPS support

Two drivers use this file, and neither supports quad strips.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/dri/i915/intel_render.c   |  1 -
 src/mesa/drivers/dri/radeon/radeon_swtcl.c |  1 -
 src/mesa/tnl_dd/t_dd_dmatmp.h              | 41 ++++++------------------------
 3 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index df21c6651df..2b3cf48b413 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -65,7 +65,6 @@
 #define HAVE_TRI_FANS    1
 #define HAVE_POLYGONS    1
 #define HAVE_QUADS       0
-#define HAVE_QUAD_STRIPS 0
 
 #define HAVE_ELTS        0
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 7938c5301da..c01a985e355 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -356,7 +356,6 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
 #define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_QUADS       0
-#define HAVE_QUAD_STRIPS 0
 #define HAVE_POLYGONS    0
 /* \todo: is it possible to make "ELTS" work with t_vertex code ? */
 #define HAVE_ELTS        0
diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index b127605c4cc..5da0eae37cc 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -43,6 +43,10 @@
 #error "must have at least triangles to use render template"
 #endif
 
+#if HAVE_QUAD_STRIPS
+#error "quad strips not supported by render template"
+#endif
+
 #if !HAVE_ELTS
 #define ELTS_VARS(buf)
 #define ALLOC_ELTS(nr) 0
@@ -418,33 +422,9 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 {
    GLuint j, nr;
 
-   if (HAVE_QUAD_STRIPS) {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-
-      INIT(GL_QUAD_STRIP);
-
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
-
-      dmasz -= (dmasz & 2);
-      currentsz -= (currentsz & 2);
-
-      for (j = 0; j + 3 < count; j += nr - 2 ) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
-      }
-
-      FLUSH();
-
-   } else if (HAVE_TRI_STRIPS && 
-	      ctx->Light.ShadeModel == GL_FLAT &&
-	      TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
+   if (HAVE_TRI_STRIPS &&
+       ctx->Light.ShadeModel == GL_FLAT &&
+       TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
       if (HAVE_ELTS) {
 	 LOCAL_VARS;
 	 int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
@@ -999,9 +979,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 					 GLuint count,
 					 GLuint flags )
 {
-   if (HAVE_QUAD_STRIPS && 0) {
-   }
-   else if (HAVE_TRI_STRIPS) {
+   if (HAVE_TRI_STRIPS) {
       LOCAL_VARS;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
@@ -1216,9 +1194,6 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
       case GL_QUAD_STRIP:
 	 if (VB->Elts) {
 	    ok = HAVE_TRI_STRIPS;
-	 }
-	 else if (HAVE_QUAD_STRIPS) {
-	    ok = GL_TRUE;
 	 } else if (HAVE_TRI_STRIPS && 
 		    ctx->Light.ShadeModel == GL_FLAT &&
 		    VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
-- 
cgit v1.2.3


From 4ecc387a935d2364c5be02cbf7a72b0a05fa419a Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 12:38:19 -0700
Subject: t_dd_dmatmp: Remove HAVE_QUADS support

Two drivers use this file, and neither supports quads.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/dri/i915/intel_render.c   |  1 -
 src/mesa/drivers/dri/radeon/radeon_swtcl.c |  1 -
 src/mesa/tnl_dd/t_dd_dmatmp.h              | 51 +++---------------------------
 3 files changed, 5 insertions(+), 48 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index 2b3cf48b413..990f65c188c 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -64,7 +64,6 @@
 #define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
 #define HAVE_TRI_FANS    1
 #define HAVE_POLYGONS    1
-#define HAVE_QUADS       0
 
 #define HAVE_ELTS        0
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index c01a985e355..dce8aea218e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -355,7 +355,6 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
 #define HAVE_TRI_STRIPS  1
 #define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
-#define HAVE_QUADS       0
 #define HAVE_POLYGONS    0
 /* \todo: is it possible to make "ELTS" work with t_vertex code ? */
 #define HAVE_ELTS        0
diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 5da0eae37cc..33a0256496c 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -43,8 +43,8 @@
 #error "must have at least triangles to use render template"
 #endif
 
-#if HAVE_QUAD_STRIPS
-#error "quad strips not supported by render template"
+#if HAVE_QUAD_STRIPS || HAVE_QUADS
+#error "quads and quad strips not supported by render template"
 #endif
 
 #if !HAVE_ELTS
@@ -525,25 +525,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
    /* Emit whole number of quads in total. */
    count -= count & 3;
 
-   if (HAVE_QUADS) {
-      LOCAL_VARS;
-      int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
-      int currentsz;
-      GLuint j, nr;
-
-      INIT(GL_QUADS);
-
-      currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
-      if (currentsz < 8)
-         currentsz = dmasz;
-
-      for (j = 0; j < count; j += nr) {
-         nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-         currentsz = dmasz;
-      }
-   }
-   else if (HAVE_ELTS) {
+   if (HAVE_ELTS) {
       /* Hardware doesn't have a quad primitive type -- try to
        * simulate it using indexed vertices and the triangle
        * primitive:
@@ -1048,28 +1030,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
    /* Emit whole number of quads in total. */
    count -= count & 3;
 
-   if (HAVE_QUADS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/4*4;
-      int currentsz;
-      GLuint j, nr;
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLES );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
-
-      if (currentsz < 8)
-	 currentsz = dmasz;
-
-      for (j = 0; j < count; j += nr) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	 FLUSH();
-	 currentsz = dmasz;
-      }
-   } else {
+   {
       LOCAL_VARS;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
@@ -1208,9 +1169,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	    ok = HAVE_TRI_STRIPS;
 	 break;
       case GL_QUADS:
-	 if (HAVE_QUADS) {
-	    ok = GL_TRUE;
-	 } else if (HAVE_ELTS) {
+         if (HAVE_ELTS) {
 	    ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 	 }
 	 else {
-- 
cgit v1.2.3


From 265624c5af6f7eaa18bc81cc8aa8aeb0f5801861 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 12:41:33 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_QUADS change

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 87 ++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 46 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 33a0256496c..dadbf21d54a 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -517,10 +517,10 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 }
 
 
-static void TAG(render_quads_verts)( struct gl_context *ctx,
-				     GLuint start,
-				     GLuint count,
-				     GLuint flags )
+static void TAG(render_quads_verts)(struct gl_context *ctx,
+                                    GLuint start,
+                                    GLuint count,
+                                    GLuint flags)
 {
    /* Emit whole number of quads in total. */
    count -= count & 3;
@@ -1022,58 +1022,53 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 }
 
 
-static void TAG(render_quads_elts)( struct gl_context *ctx,
-				    GLuint start,
-				    GLuint count,
-				    GLuint flags )
+static void TAG(render_quads_elts)(struct gl_context *ctx,
+                                   GLuint start,
+                                   GLuint count,
+                                   GLuint flags)
 {
-   /* Emit whole number of quads in total. */
-   count -= count & 3;
-
-   {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
-
-      ELT_INIT( GL_TRIANGLES );
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
+   LOCAL_VARS;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint j, nr;
 
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 3;
-      currentsz -= currentsz & 3;
+   ELT_INIT(GL_TRIANGLES);
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
 
-      /* Adjust for rendering as triangles:
-       */
-      currentsz = currentsz/6*4;
-      dmasz = dmasz/6*4;
+   /* Emit whole number of quads in total, and in each buffer.
+    */
+   count -= count & 3;
+   dmasz -= dmasz & 3;
+   currentsz -= currentsz & 3;
 
-      if (currentsz < 8)
-	 currentsz = dmasz;
+   /* Adjust for rendering as triangles:
+    */
+   currentsz = currentsz / 6 * 4;
+   dmasz = dmasz / 6 * 4;
 
-      for (j = 0; j + 3 < count; j += nr - 2) {
-	 nr = MIN2( currentsz, count - j );
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-	 if (nr >= 4)
-	 {
-	    GLint quads = nr/4;
-	    GLint i;
-	    ELTS_VARS( ALLOC_ELTS( quads * 6 ) );
+   for (j = 0; j + 3 < count; j += nr - 2) {
+      nr = MIN2(currentsz, count - j);
 
-	    for (i = j; i < j + quads; i++, elts += 4) {
-	       EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-	       EMIT_TWO_ELTS( 2, elts[3], elts[1] );
-	       EMIT_TWO_ELTS( 4, elts[2], elts[3] );
-	       INCR_ELTS( 6 );
-	    }
+      if (nr >= 4) {
+         GLint quads = nr / 4;
+         GLint i;
+         ELTS_VARS(ALLOC_ELTS(quads * 6));
 
-	    FLUSH();
-	 }
+         for (i = j; i < j + quads; i++, elts += 4) {
+            EMIT_TWO_ELTS(0, elts[0], elts[1]);
+            EMIT_TWO_ELTS(2, elts[3], elts[1]);
+            EMIT_TWO_ELTS(4, elts[2], elts[3]);
+            INCR_ELTS(6);
+         }
 
-	 currentsz = dmasz;
+         FLUSH();
       }
+
+      currentsz = dmasz;
    }
 }
 
-- 
cgit v1.2.3


From b8461e03f01167a8bafe7fa9f12ff0ec246533b4 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 12:46:21 -0700
Subject: t_dd_dmatmp: Require HAVE_LINES

Two drivers use this file, and both support lines.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index dadbf21d54a..14dfa1d5659 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -39,8 +39,8 @@
  * tristrips, lineloops to linestrips), or to indexed vertices.
  */
 
-#if !defined(HAVE_TRIANGLES)
-#error "must have at least triangles to use render template"
+#if !defined(HAVE_TRIANGLES) || !HAVE_LINES
+#error "must have lines and triangles to use render template"
 #endif
 
 #if HAVE_QUAD_STRIPS || HAVE_QUADS
@@ -142,7 +142,6 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
 				     GLuint count,
 				     GLuint flags )
 {
-   if (HAVE_LINES) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       int currentsz;
@@ -165,11 +164,6 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
          TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -673,7 +667,6 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
 				    GLuint count,
 				    GLuint flags )
 {
-   if (HAVE_LINES) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
       int currentsz;
@@ -698,10 +691,6 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
 	 FLUSH();
 	 currentsz = dmasz;
       }
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -1122,7 +1111,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = HAVE_POINTS;
 	 break;
       case GL_LINES:
-	 ok = HAVE_LINES && !ctx->Line.StippleFlag;
+	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_LINE_STRIP:
 	 ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
-- 
cgit v1.2.3


From 1ab8a69a3b7e2846a602f14aa6e4dd5994be1615 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:51:46 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_LINES change

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 92 +++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 14dfa1d5659..477fe36c9e9 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -137,33 +137,33 @@ static void TAG(render_points_verts)( struct gl_context *ctx,
    }
 }
 
-static void TAG(render_lines_verts)( struct gl_context *ctx,
-				     GLuint start,
-				     GLuint count,
-				     GLuint flags )
+static void TAG(render_lines_verts)(struct gl_context *ctx,
+                                    GLuint start,
+                                    GLuint count,
+                                    GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   int currentsz;
+   GLuint j, nr;
 
-      INIT( GL_LINES );
+   INIT(GL_LINES);
 
-      /* Emit whole number of lines in total and in each buffer:
-       */
-      count -= count & 1;
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      currentsz -= currentsz & 1;
-      dmasz -= dmasz & 1;
+   /* Emit whole number of lines in total and in each buffer:
+    */
+   count -= count & 1;
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   currentsz -= currentsz & 1;
+   dmasz -= dmasz & 1;
 
-      if (currentsz < 8)
-	 currentsz = dmasz;
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 0; j < count; j += nr) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
-      }
+   for (j = 0; j < count; j += nr) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
+   }
 }
 
 
@@ -662,35 +662,35 @@ static void TAG(render_points_elts)( struct gl_context *ctx,
 
 
-static void TAG(render_lines_elts)( struct gl_context *ctx,
-				    GLuint start,
-				    GLuint count,
-				    GLuint flags )
+static void TAG(render_lines_elts)(struct gl_context *ctx,
+                                   GLuint start,
+                                   GLuint count,
+                                   GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   GLuint j, nr;
 
-      ELT_INIT( GL_LINES );
+   ELT_INIT(GL_LINES);
 
-      /* Emit whole number of lines in total and in each buffer:
-       */
-      count -= count & 1;
-      currentsz -= currentsz & 1;
-      dmasz -= dmasz & 1;
+   /* Emit whole number of lines in total and in each buffer:
+    */
+   count -= count & 1;
+   currentsz -= currentsz & 1;
+   dmasz -= dmasz & 1;
 
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-	 currentsz = dmasz;
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 0; j < count; j += nr ) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	 FLUSH();
-	 currentsz = dmasz;
-      }
+   for (j = 0; j < count; j += nr ) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
+      FLUSH();
+      currentsz = dmasz;
+   }
 }
 
 
-- 
cgit v1.2.3


From 1ecdf956ac699aee5e8c62887a40608accb0ac94 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:08:40 -0700
Subject: t_dd_dmatmp: Require HAVE_LINE_STRIPS

Two drivers use this file, and both support line strips.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 477fe36c9e9..c0fe5d09d30 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -39,8 +39,8 @@
  * tristrips, lineloops to linestrips), or to indexed vertices.
  */
 
-#if !defined(HAVE_TRIANGLES) || !HAVE_LINES
-#error "must have lines and triangles to use render template"
+#if !defined(HAVE_TRIANGLES) || !HAVE_LINES || !HAVE_LINE_STRIPS
+#error "must have lines, line strips, and triangles to use render template"
 #endif
 
 #if HAVE_QUAD_STRIPS || HAVE_QUADS
@@ -172,7 +172,6 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx,
 					  GLuint count,
 					  GLuint flags )
 {
-   if (HAVE_LINE_STRIPS) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       int currentsz;
@@ -191,11 +190,6 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx,
       }
  
       FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -204,7 +198,6 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 					 GLuint count,
 					 GLuint flags )
 {
-   if (HAVE_LINE_STRIPS) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       int currentsz;
@@ -254,11 +247,6 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
       }
 
       FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -699,7 +687,6 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx,
 					 GLuint count,
 					 GLuint flags )
 {
-   if (HAVE_LINE_STRIPS) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
       int currentsz;
@@ -719,12 +706,6 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx,
 	 FLUSH();
 	 currentsz = dmasz;
       }
-   } else {
-      /* TODO: Try to emit as indexed lines.
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -733,7 +714,6 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 					GLuint count,
 					GLuint flags )
 {
-   if (HAVE_LINE_STRIPS) {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
       int currentsz;
@@ -785,11 +765,6 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
       }
 
       FLUSH();
-   } else {
-      /* TODO: Try to emit as indexed lines */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -1114,10 +1089,10 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_LINE_STRIP:
-	 ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
+	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_LINE_LOOP:
-	 ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
+	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_TRIANGLES:
 	 ok = HAVE_TRIANGLES;
-- 
cgit v1.2.3


From dcd8e49962b550f8854fff20e0369dae5550f640 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:14:08 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_LINE_STRIPS
 change

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 260 ++++++++++++++++++++----------------------
 1 file changed, 123 insertions(+), 137 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index c0fe5d09d30..bfdae5a428a 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -167,86 +167,81 @@ static void TAG(render_lines_verts)(struct gl_context *ctx,
 }
 
 
-static void TAG(render_line_strip_verts)( struct gl_context *ctx,
-					  GLuint start,
-					  GLuint count,
-					  GLuint flags )
+static void TAG(render_line_strip_verts)(struct gl_context *ctx,
+                                         GLuint start,
+                                         GLuint count,
+                                         GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   int currentsz;
+   GLuint j, nr;
 
-      INIT( GL_LINE_STRIP );
+   INIT(GL_LINE_STRIP);
 
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8)
-	 currentsz = dmasz;
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 0; j + 1 < count; j += nr - 1 ) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
-      }
+   for (j = 0; j + 1 < count; j += nr - 1) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
+   }
  
-      FLUSH();
+   FLUSH();
 }
 
 
-static void TAG(render_line_loop_verts)( struct gl_context *ctx,
-					 GLuint start,
-					 GLuint count,
-					 GLuint flags )
+static void TAG(render_line_loop_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
-
-      INIT( GL_LINE_STRIP );
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   int currentsz;
+   GLuint j, nr;
 
-      j = (flags & PRIM_BEGIN) ? 0 : 1;
+   INIT(GL_LINE_STRIP);
 
-      /* Ensure last vertex won't wrap buffers:
-       */
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      currentsz--;
-      dmasz--;
+   j = (flags & PRIM_BEGIN) ? 0 : 1;
 
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
-
-      if (j + 1 < count) {
-	 for ( ; j + 1 < count; j += nr - 1 ) {
-	    nr = MIN2( currentsz, count - j );
+   /* Ensure last vertex won't wrap buffers:
+    */
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   currentsz--;
+   dmasz--;
 
-	    if (j + nr >= count &&
-		count > 1 &&
-		(flags & PRIM_END)) 
-	    {
-	       void *tmp;
-	       tmp = ALLOC_VERTS(nr+1);
-               tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
-	       tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	       (void) tmp;
-	    }
-	    else {
-               TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	       currentsz = dmasz;
-	    }
-	 }
+   if (currentsz < 8)
+      currentsz = dmasz;
 
+   if (j + 1 < count) {
+      for (/* empty */; j + 1 < count; j += nr - 1) {
+         nr = MIN2(currentsz, count - j);
+
+         if (j + nr >= count &&
+             count > 1 &&
+             (flags & PRIM_END)) {
+            void *tmp;
+            tmp = ALLOC_VERTS(nr+1);
+            tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
+            tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+            (void) tmp;
+         } else {
+            TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+            currentsz = dmasz;
+         }
       }
-      else if (count > 1 && (flags & PRIM_END)) {
-	 void *tmp;
-	 tmp = ALLOC_VERTS(2);
-	 tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 (void) tmp;
-      }
+   } else if (count > 1 && (flags & PRIM_END)) {
+      void *tmp;
+      tmp = ALLOC_VERTS(2);
+      tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
+      tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+      (void) tmp;
+   }
 
-      FLUSH();
+   FLUSH();
 }
 
 
@@ -682,89 +677,84 @@ static void TAG(render_lines_elts)(struct gl_context *ctx,
 }
 
 
-static void TAG(render_line_strip_elts)( struct gl_context *ctx,
-					 GLuint start,
-					 GLuint count,
-					 GLuint flags )
+static void TAG(render_line_strip_elts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   GLuint j, nr;
 
-      FLUSH(); /* always a new primitive */
-      ELT_INIT( GL_LINE_STRIP );
+   FLUSH(); /* always a new primitive */
+   ELT_INIT(GL_LINE_STRIP);
 
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-	 currentsz = dmasz;
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 0; j + 1 < count; j += nr - 1) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	 FLUSH();
-	 currentsz = dmasz;
-      }
+   for (j = 0; j + 1 < count; j += nr - 1) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
+      FLUSH();
+      currentsz = dmasz;
+   }
 }
 
 
-static void TAG(render_line_loop_elts)( struct gl_context *ctx,
-					GLuint start,
-					GLuint count,
-					GLuint flags )
+static void TAG(render_line_loop_elts)(struct gl_context *ctx,
+                                       GLuint start,
+                                       GLuint count,
+                                       GLuint flags)
 {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      FLUSH();
-      ELT_INIT( GL_LINE_STRIP );
-
-      j = (flags & PRIM_BEGIN) ? 0 : 1;
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
+   LOCAL_VARS;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   GLuint j, nr;
 
-      /* Ensure last vertex doesn't wrap:
-       */
-      currentsz--;
-      dmasz--;
+   FLUSH();
+   ELT_INIT(GL_LINE_STRIP);
 
-      if (j + 1 < count) {
-	 for ( ; j + 1 < count; j += nr - 1 ) {
-	    nr = MIN2( currentsz, count - j );
+   j = (flags & PRIM_BEGIN) ? 0 : 1;
 
-	    if (j + nr >= count &&
-		count > 1 &&
-		(flags & PRIM_END)) 
-	    {
-	       void *tmp;
-	       tmp = ALLOC_ELTS(nr+1);
-               tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
-	       tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	       (void) tmp;
-	    }
-	    else {
-               TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	       currentsz = dmasz;
-	    }
-	 }
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
+   /* Ensure last vertex doesn't wrap:
+    */
+   currentsz--;
+   dmasz--;
+
+   if (j + 1 < count) {
+      for (/* empty */; j + 1 < count; j += nr - 1) {
+         nr = MIN2(currentsz, count - j);
+
+         if (j + nr >= count &&
+             count > 1 &&
+             (flags & PRIM_END)) {
+            void *tmp;
+            tmp = ALLOC_ELTS(nr+1);
+            tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
+            tmp = TAG(emit_elts)(ctx, elts + start, 1, tmp);
+            (void) tmp;
+         } else {
+            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
+            currentsz = dmasz;
+         }
       }
-      else if (count > 1 && (flags & PRIM_END)) {
-	 void *tmp;
-	 tmp = ALLOC_ELTS(2);
-	 tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 (void) tmp;
-      }
+   } else if (count > 1 && (flags & PRIM_END)) {
+      void *tmp;
+      tmp = ALLOC_ELTS(2);
+      tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
+      tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
+      (void) tmp;
+   }
 
-      FLUSH();
+   FLUSH();
 }
 
 
@@ -1086,11 +1076,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = HAVE_POINTS;
 	 break;
       case GL_LINES:
-	 ok = !ctx->Line.StippleFlag;
-	 break;
       case GL_LINE_STRIP:
-	 ok = !ctx->Line.StippleFlag;
-	 break;
       case GL_LINE_LOOP:
 	 ok = !ctx->Line.StippleFlag;
 	 break;
-- 
cgit v1.2.3


From 22b73f3c2a09375c49bd6202dabdbff368351315 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:19:44 -0700
Subject: t_dd_dmatmp: Require HAVE_TRIANGLES

Two drivers use this file, and both support triangles.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index bfdae5a428a..563875cd75c 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -39,7 +39,7 @@
  * tristrips, lineloops to linestrips), or to indexed vertices.
  */
 
-#if !defined(HAVE_TRIANGLES) || !HAVE_LINES || !HAVE_LINE_STRIPS
+#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS
 #error "must have lines, line strips, and triangles to use render template"
 #endif
 
@@ -552,7 +552,7 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
 
       RELEASE_ELT_VERTS();
    }
-   else if (HAVE_TRIANGLES) {
+   else {
       /* Hardware doesn't have a quad primitive type -- try to
        * simulate it using triangle primitive.  This is a win for
        * gears, but is it useful in the broader world?
@@ -574,12 +574,6 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
 	 (void) tmp;
       }
    }
-   else {
-      /* Vertices won't fit in a single buffer, should never happen.
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 static void TAG(render_noop)( struct gl_context *ctx,
@@ -1081,7 +1075,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_TRIANGLES:
-	 ok = HAVE_TRIANGLES;
+	 ok = GL_TRUE;
 	 break;
       case GL_TRIANGLE_STRIP:
 	 ok = HAVE_TRI_STRIPS;
@@ -1118,7 +1112,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	    ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 	 }
 	 else {
-	    ok = HAVE_TRIANGLES; /* flatshading is ok. */
+	    ok = GL_TRUE; /* flatshading is ok. */
 	 }
 	 break;
       default:
-- 
cgit v1.2.3


From fd97a055083a8927d1f6458b9015a202f43f8f01 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:23:44 -0700
Subject: t_dd_dmatmp: Require HAVE_TRI_STRIPS

Two drivers use this file, and both support triangle strips.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 36 ++++++++----------------------------
 1 file changed, 8 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 563875cd75c..42dfd0a57c6 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -39,8 +39,8 @@
  * tristrips, lineloops to linestrips), or to indexed vertices.
  */
 
-#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS
-#error "must have lines, line strips, and triangles to use render template"
+#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS || !HAVE_TRI_STRIPS
+#error "must have lines, line strips, triangles, and triangle strips to use render template"
 #endif
 
 #if HAVE_QUAD_STRIPS || HAVE_QUADS
@@ -281,7 +281,6 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
 					 GLuint count,
 					 GLuint flags )
 {
-   if (HAVE_TRI_STRIPS) {
       LOCAL_VARS;
       GLuint j, nr;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
@@ -307,11 +306,6 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
       }
 
       FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
@@ -399,8 +393,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 {
    GLuint j, nr;
 
-   if (HAVE_TRI_STRIPS &&
-       ctx->Light.ShadeModel == GL_FLAT &&
+   if (ctx->Light.ShadeModel == GL_FLAT &&
        TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
       if (HAVE_ELTS) {
 	 LOCAL_VARS;
@@ -458,7 +451,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 return;
       }
    }
-   else if (HAVE_TRI_STRIPS) {
+   else {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       int currentsz;
@@ -486,10 +479,6 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
       }
 
       FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
    }
 }
 
@@ -795,7 +784,6 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
 					GLuint count,
 					GLuint flags )
 {
-   if (HAVE_TRI_STRIPS) {
       LOCAL_VARS;
       GLuint j, nr;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
@@ -821,11 +809,6 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
 	 FLUSH();
 	 currentsz = dmasz;
       }
-   } else {
-      /* TODO: try to emit as indexed triangles */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
@@ -909,7 +892,6 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 					 GLuint count,
 					 GLuint flags )
 {
-   if (HAVE_TRI_STRIPS) {
       LOCAL_VARS;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
       int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
@@ -966,7 +948,6 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	    currentsz = dmasz;
 	 }
       }
-   }
 }
 
 
@@ -1078,7 +1059,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = GL_TRUE;
 	 break;
       case GL_TRIANGLE_STRIP:
-	 ok = HAVE_TRI_STRIPS;
+	 ok = GL_TRUE;
 	 break;
       case GL_TRIANGLE_FAN:
 	 ok = HAVE_TRI_FANS;
@@ -1093,9 +1074,8 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 break;
       case GL_QUAD_STRIP:
 	 if (VB->Elts) {
-	    ok = HAVE_TRI_STRIPS;
-	 } else if (HAVE_TRI_STRIPS && 
-		    ctx->Light.ShadeModel == GL_FLAT &&
+	    ok = GL_TRUE;
+	 } else if (ctx->Light.ShadeModel == GL_FLAT &&
 		    VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
 	    if (HAVE_ELTS) {
 	       ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
@@ -1105,7 +1085,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	    }
 	 }
 	 else 
-	    ok = HAVE_TRI_STRIPS;
+	    ok = GL_TRUE;
 	 break;
       case GL_QUADS:
          if (HAVE_ELTS) {
-- 
cgit v1.2.3


From 2e19ed3cb54cb33e8d772bfde78cc1d33dc0853b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:29:31 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_TRI_STRIPS
 change

v2: Fix '- nr' typo noticed by Marius.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com> [v1]
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 198 ++++++++++++++++++++----------------------
 1 file changed, 95 insertions(+), 103 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 42dfd0a57c6..0c63284d2cc 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -276,36 +276,35 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx,
 
 
-static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
-					 GLuint start,
-					 GLuint count,
-					 GLuint flags )
+static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
 {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+   LOCAL_VARS;
+   GLuint j, nr;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   int currentsz;
 
-      INIT(GL_TRIANGLE_STRIP);
+   INIT(GL_TRIANGLE_STRIP);
 
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
 
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      /* From here on emit even numbers of tris when wrapping over buffers:
-       */
-      dmasz -= (dmasz & 1);
-      currentsz -= (currentsz & 1);
+   /* From here on emit even numbers of tris when wrapping over buffers:
+    */
+   dmasz -= (dmasz & 1);
+   currentsz -= (currentsz & 1);
 
-      for (j = 0; j + 2 < count; j += nr - 2) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
-      }
+   for (j = 0; j + 2 < count; j += nr - 2) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
+   }
 
-      FLUSH();
+   FLUSH();
 }
 
 static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
@@ -450,8 +449,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
         fprintf(stderr, "%s - cannot draw primitive\n", __func__);
 	 return;
       }
-   }
-   else {
+   } else {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       int currentsz;
@@ -459,7 +457,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
       /* Emit smooth-shaded quadstrips as tristrips:
        */
       FLUSH();
-      INIT( GL_TRIANGLE_STRIP );
+      INIT(GL_TRIANGLE_STRIP);
 
       /* Emit whole number of quads in total, and in each buffer.
        */
@@ -468,12 +466,11 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
       currentsz -= currentsz & 1;
       count -= count & 1;
 
-      if (currentsz < 8) {
+      if (currentsz < 8)
 	 currentsz = dmasz;
-      }
 
       for (j = 0; j + 3 < count; j += nr - 2) {
-	 nr = MIN2( currentsz, count - j );
+         nr = MIN2(currentsz, count - j);
          TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
       }
@@ -779,36 +776,35 @@ static void TAG(render_triangles_elts)( struct gl_context *ctx,
 
 
-static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
-					GLuint start,
-					GLuint count,
-					GLuint flags )
+static void TAG(render_tri_strip_elts)(struct gl_context *ctx,
+                                       GLuint start,
+                                       GLuint count,
+                                       GLuint flags)
 {
-      LOCAL_VARS;
-      GLuint j, nr;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
+   LOCAL_VARS;
+   GLuint j, nr;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
 
-      FLUSH();
-      ELT_INIT( GL_TRIANGLE_STRIP );
+   FLUSH();
+   ELT_INIT(GL_TRIANGLE_STRIP);
 
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      /* Keep the same winding over multiple buffers:
-       */
-      dmasz -= (dmasz & 1);
-      currentsz -= (currentsz & 1);
+   /* Keep the same winding over multiple buffers:
+    */
+   dmasz -= (dmasz & 1);
+   currentsz -= (currentsz & 1);
 
-      for (j = 0; j + 2 < count; j += nr - 2) {
-	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
-	 FLUSH();
-	 currentsz = dmasz;
-      }
+   for (j = 0; j + 2 < count; j += nr - 2) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
+      FLUSH();
+      currentsz = dmasz;
+   }
 }
 
 static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
@@ -887,67 +883,65 @@ static void TAG(render_poly_elts)( struct gl_context *ctx,
    }
 }
 
-static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
-					 GLuint start,
-					 GLuint count,
-					 GLuint flags )
+static void TAG(render_quad_strip_elts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
 {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
+   LOCAL_VARS;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
+   GLuint j, nr;
 
-      FLUSH();
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
+   FLUSH();
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
 
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 1;
-      count -= count & 1;
-      currentsz -= currentsz & 1;
+   /* Emit whole number of quads in total, and in each buffer.
+    */
+   dmasz -= dmasz & 1;
+   count -= count & 1;
+   currentsz -= currentsz & 1;
 
-      if (currentsz < 12)
-	 currentsz = dmasz;
+   if (currentsz < 12)
+      currentsz = dmasz;
 
-      if (ctx->Light.ShadeModel == GL_FLAT) {
-	 ELT_INIT( GL_TRIANGLES );
+   if (ctx->Light.ShadeModel == GL_FLAT) {
+      ELT_INIT(GL_TRIANGLES);
 
-	 currentsz = currentsz/6*2;
-	 dmasz = dmasz/6*2;
+      currentsz = currentsz / 6 * 2;
+      dmasz = dmasz / 6 * 2;
 
-         for (j = 0; j + 3 < count; j += nr - 2) {
-	    nr = MIN2( currentsz, count - j );
+      for (j = 0; j + 3 < count; j += nr - 2) {
+         nr = MIN2(currentsz, count - j);
 
-	    if (nr >= 4)
-	    {
-	       GLint i;
-	       GLint quads = (nr/2)-1;
-	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );
+         if (nr >= 4) {
+            GLint i;
+            GLint quads = (nr / 2) - 1;
+            ELTS_VARS(ALLOC_ELTS( quads * 6));
 
-               for (i = j; i < j + quads; i++, elts += 2) {
-		  EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-		  EMIT_TWO_ELTS( 2, elts[2], elts[1] );
-		  EMIT_TWO_ELTS( 4, elts[3], elts[2] );
-		  INCR_ELTS( 6 );
-	       }
+            for (i = j; i < j + quads; i++, elts += 2) {
+               EMIT_TWO_ELTS(0, elts[0], elts[1]);
+               EMIT_TWO_ELTS(2, elts[2], elts[1]);
+               EMIT_TWO_ELTS(4, elts[3], elts[2]);
+               INCR_ELTS(6);
+            }
 
-	       FLUSH();
-	    }
+            FLUSH();
+         }
 
-	    currentsz = dmasz;
-	 }
+         currentsz = dmasz;
       }
-      else {
-	 ELT_INIT( GL_TRIANGLE_STRIP );
+   } else {
+      ELT_INIT(GL_TRIANGLE_STRIP);
 
-         for (j = 0; j + 3 < count; j += nr - 2) {
-	    nr = MIN2( currentsz, count - j );
-            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	    FLUSH();
-	    currentsz = dmasz;
-	 }
+      for (j = 0; j + 3 < count; j += nr - 2) {
+         nr = MIN2(currentsz, count - j);
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
+         FLUSH();
+         currentsz = dmasz;
       }
+   }
 }
 
 
@@ -1056,8 +1050,6 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = !ctx->Line.StippleFlag;
 	 break;
       case GL_TRIANGLES:
-	 ok = GL_TRUE;
-	 break;
       case GL_TRIANGLE_STRIP:
 	 ok = GL_TRUE;
 	 break;
-- 
cgit v1.2.3


From 03c3208c185c29dd26446ffa5bae8f05f51d3f1b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:31:09 -0700
Subject: t_dd_dmatmp: Require HAVE_TRI_FANS

Two drivers use this file, and both support triangle fans.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 0c63284d2cc..07e7ccc6b74 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -39,8 +39,8 @@
  * tristrips, lineloops to linestrips), or to indexed vertices.
  */
 
-#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS || !HAVE_TRI_STRIPS
-#error "must have lines, line strips, triangles, and triangle strips to use render template"
+#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS || !HAVE_TRI_STRIPS || !HAVE_TRI_FANS
+#error "must have lines, line strips, triangles, triangle fans, and triangle strips to use render template"
 #endif
 
 #if HAVE_QUAD_STRIPS || HAVE_QUADS
@@ -312,7 +312,6 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
 				       GLuint count,
 				       GLuint flags )
 {
-   if (HAVE_TRI_FANS) {
       LOCAL_VARS;
       GLuint j, nr;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
@@ -336,14 +335,6 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
       }
 
       FLUSH();
-   }
-   else {
-      /* Could write code to emit these as indexed vertices (for the
-       * g400, for instance).
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -377,7 +368,7 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
 
       FLUSH();
    }
-   else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
+   else if (ctx->Light.ShadeModel == GL_SMOOTH) {
       TAG(render_tri_fan_verts)( ctx, start, count, flags );
    } else {
       fprintf(stderr, "%s - cannot draw primitive\n", __func__);
@@ -812,7 +803,6 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
 				      GLuint count,
 				      GLuint flags )
 {
-   if (HAVE_TRI_FANS) {
       LOCAL_VARS;
       GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
       GLuint j, nr;
@@ -837,11 +827,6 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
 	 FLUSH();
 	 currentsz = dmasz;
       }
-   } else {
-      /* TODO: try to emit as indexed triangles */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
 }
 
 
@@ -875,7 +860,7 @@ static void TAG(render_poly_elts)( struct gl_context *ctx,
 	 FLUSH();
 	 currentsz = dmasz;
       }
-   } else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
+   } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
       TAG(render_tri_fan_verts)( ctx, start, count, flags );
    } else {
       fprintf(stderr, "%s - cannot draw primitive\n", __func__);
@@ -1054,14 +1039,14 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 ok = GL_TRUE;
 	 break;
       case GL_TRIANGLE_FAN:
-	 ok = HAVE_TRI_FANS;
+	 ok = GL_TRUE;
 	 break;
       case GL_POLYGON:
 	 if (HAVE_POLYGONS) {
 	    ok = GL_TRUE;
 	 }
 	 else {
-	    ok = (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH);
+	    ok = (ctx->Light.ShadeModel == GL_SMOOTH);
          }
 	 break;
       case GL_QUAD_STRIP:
-- 
cgit v1.2.3


From 1f374958fdc02050e9d3c0f2dfd0b87702470fb7 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 14:35:51 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_TRI_FANS
 change

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 103 +++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 56 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 07e7ccc6b74..f4940e8c039 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -307,34 +307,33 @@ static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
    FLUSH();
 }
 
-static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
-				       GLuint start,
-				       GLuint count,
-				       GLuint flags )
+static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
+                                      GLuint start,
+                                      GLuint count,
+                                      GLuint flags)
 {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+   LOCAL_VARS;
+   GLuint j, nr;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   int currentsz;
 
-      INIT(GL_TRIANGLE_FAN);
+   INIT(GL_TRIANGLE_FAN);
 
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 1; j + 1 < count; j += nr - 2) {
-	 void *tmp;
-	 nr = MIN2( currentsz, count - j + 1 );
-	 tmp = ALLOC_VERTS( nr );
-	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-         tmp = TAG(emit_verts)( ctx, start + j, nr - 1, tmp );
-	 (void) tmp;
-	 currentsz = dmasz;
-      }
+   for (j = 1; j + 1 < count; j += nr - 2) {
+      void *tmp;
+      nr = MIN2(currentsz, count - j + 1);
+      tmp = ALLOC_VERTS(nr);
+      tmp = TAG(emit_verts)(ctx, start, 1, tmp);
+      tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
+      (void) tmp;
+      currentsz = dmasz;
+   }
 
-      FLUSH();
+   FLUSH();
 }
 
 
@@ -798,35 +797,34 @@ static void TAG(render_tri_strip_elts)(struct gl_context *ctx,
    }
 }
 
-static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
-				      GLuint start,
-				      GLuint count,
-				      GLuint flags )
+static void TAG(render_tri_fan_elts)(struct gl_context *ctx,
+                                     GLuint start,
+                                     GLuint count,
+                                     GLuint flags)
 {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
+   LOCAL_VARS;
+   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
+   GLuint j, nr;
+   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
+   int currentsz;
 
-      FLUSH();
-      ELT_INIT( GL_TRIANGLE_FAN );
+   FLUSH();
+   ELT_INIT(GL_TRIANGLE_FAN);
 
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
+   currentsz = GET_CURRENT_VB_MAX_ELTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
 
-      for (j = 1; j + 1 < count; j += nr - 2) {
-	 void *tmp;
-	 nr = MIN2( currentsz, count - j + 1 );
-	 tmp = ALLOC_ELTS( nr );
-	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
-	 (void) tmp;
-	 FLUSH();
-	 currentsz = dmasz;
-      }
+   for (j = 1; j + 1 < count; j += nr - 2) {
+      void *tmp;
+      nr = MIN2(currentsz, count - j + 1);
+      tmp = ALLOC_ELTS(nr);
+      tmp = TAG(emit_elts)(ctx, elts + start, 1, tmp);
+      tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
+      (void) tmp;
+      FLUSH();
+      currentsz = dmasz;
+   }
 }
 
 
@@ -1036,18 +1034,11 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	 break;
       case GL_TRIANGLES:
       case GL_TRIANGLE_STRIP:
-	 ok = GL_TRUE;
-	 break;
       case GL_TRIANGLE_FAN:
 	 ok = GL_TRUE;
 	 break;
       case GL_POLYGON:
-	 if (HAVE_POLYGONS) {
-	    ok = GL_TRUE;
-	 }
-	 else {
-	    ok = (ctx->Light.ShadeModel == GL_SMOOTH);
-         }
+         ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
 	 break;
       case GL_QUAD_STRIP:
 	 if (VB->Elts) {
-- 
cgit v1.2.3


From 25b42f13bd762119eb1dae565e999c1bd52f7c2d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 16:57:32 -0700
Subject: t_dd_dmatmp: Remove HAVE_ELTS support

Two drivers use this file, and neither supports ELTs.

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 588 +-----------------------------------------
 1 file changed, 4 insertions(+), 584 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index f4940e8c039..f55cfe87bd6 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -43,29 +43,8 @@
 #error "must have lines, line strips, triangles, triangle fans, and triangle strips to use render template"
 #endif
 
-#if HAVE_QUAD_STRIPS || HAVE_QUADS
-#error "quads and quad strips not supported by render template"
-#endif
-
-#if !HAVE_ELTS
-#define ELTS_VARS(buf)
-#define ALLOC_ELTS(nr) 0
-#define EMIT_ELT( offset, elt )
-#define EMIT_TWO_ELTS( offset, elt0, elt1 )
-#define INCR_ELTS( nr )
-#define ELT_INIT(prim)
-#define GET_CURRENT_VB_MAX_ELTS() 0
-#define GET_SUBSEQUENT_VB_MAX_ELTS() 0
-#define RELEASE_ELT_VERTS()
-#define EMIT_INDEXED_VERTS( ctx, start, count )
-#endif
-
-#ifndef EMIT_TWO_ELTS
-#define EMIT_TWO_ELTS( offset, elt0, elt1 )	\
-do { 						\
-   EMIT_ELT( offset, elt0 ); 			\
-   EMIT_ELT( offset+1, elt1 ); 			\
-} while (0)
+#if HAVE_QUAD_STRIPS || HAVE_QUADS || HAVE_ELTS
+#error "ELTs, quads, and quad strips not supported by render template"
 #endif
 
 
@@ -73,31 +52,6 @@ do { 						\
 /*                  Render whole begin/end objects                    */
 /**********************************************************************/
 
-
-
-
-#if (HAVE_ELTS)
-static void *TAG(emit_elts)( struct gl_context *ctx, GLuint *elts, GLuint nr,
-			     void *buf)
-{
-   GLint i;
-   LOCAL_VARS;
-   ELTS_VARS(buf);
-
-   for ( i = 0 ; i+1 < nr ; i+=2, elts += 2 ) {
-      EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-      INCR_ELTS( 2 );
-   }
-   
-   if (i < nr) {
-      EMIT_ELT( 0, elts[0] );
-      INCR_ELTS( 1 );
-   }
-
-   return (void *)ELTPTR;
-}
-#endif
-
 static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start, 
 				     GLuint count, void *buf )
 {
@@ -384,61 +338,11 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 
    if (ctx->Light.ShadeModel == GL_FLAT &&
        TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
-      if (HAVE_ELTS) {
-	 LOCAL_VARS;
-	 int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-	 int currentsz;
-	 GLuint j, nr;
-
-         EMIT_INDEXED_VERTS( ctx, start, count );
-
-	 /* Simulate flat-shaded quadstrips using indexed vertices:
-	  */
-	 ELT_INIT( GL_TRIANGLES );
-
-	 currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-	 /* Emit whole number of quads in total, and in each buffer.
-	  */
-	 dmasz -= dmasz & 1;
-	 count -= count & 1;
-	 currentsz -= currentsz & 1;
-
-	 if (currentsz < 12)
-	    currentsz = dmasz;
-
-	 currentsz = currentsz/6*2;
-	 dmasz = dmasz/6*2;
-
-	 for (j = 0; j + 3 < count; j += nr - 2) {
-	    nr = MIN2( currentsz, count - j );
-	    if (nr >= 4) {
-	       GLint quads = (nr/2)-1;
-	       GLint i;
-	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
-               for (i = j; i < j + quads * 2; i += 2) {
-		  EMIT_TWO_ELTS( 0, (i+0), (i+1) );
-		  EMIT_TWO_ELTS( 2, (i+2), (i+1) );
-		  EMIT_TWO_ELTS( 4, (i+3), (i+2) );
-		  INCR_ELTS( 6 );
-	       }
-
-	       FLUSH();
-	    }
-	    currentsz = dmasz;
-	 }
-
-	 RELEASE_ELT_VERTS();
-	 FLUSH();
-      }
-      else {
 	 /* Vertices won't fit in a single buffer or elts not
 	  * available - should never happen.
 	  */
         fprintf(stderr, "%s - cannot draw primitive\n", __func__);
 	 return;
-      }
    } else {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
@@ -478,57 +382,7 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
    /* Emit whole number of quads in total. */
    count -= count & 3;
 
-   if (HAVE_ELTS) {
-      /* Hardware doesn't have a quad primitive type -- try to
-       * simulate it using indexed vertices and the triangle
-       * primitive:
-       */
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
-
-      EMIT_INDEXED_VERTS( ctx, start, count );
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLES );
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 3;
-      currentsz -= currentsz & 3;
-
-      /* Adjust for rendering as triangles:
-       */
-      currentsz = currentsz/6*4;
-      dmasz = dmasz/6*4;
-
-      if (currentsz < 8)
-	 currentsz = dmasz;
-
-      for (j = 0; j < count; j += nr ) {
-	 nr = MIN2( currentsz, count - j );
-	 if (nr >= 4) {
-	    GLint quads = nr/4;
-	    GLint i;
-	    ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
-            for (i = j; i < j + quads * 4; i += 4) {
-	       EMIT_TWO_ELTS( 0, (i+0), (i+1) );
-	       EMIT_TWO_ELTS( 2, (i+3), (i+1) );
-	       EMIT_TWO_ELTS( 4, (i+2), (i+3) );
-	       INCR_ELTS( 6 );
-	    }
-
-	    FLUSH();
-	 }
-	 currentsz = dmasz;
-      }
-
-      RELEASE_ELT_VERTS();
-   }
-   else {
+   {
       /* Hardware doesn't have a quad primitive type -- try to
        * simulate it using triangle primitive.  This is a win for
        * gears, but is it useful in the broader world?
@@ -577,430 +431,6 @@ static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
    TAG(render_noop),
 };
 
-
-/****************************************************************************
- *                 Render elts using hardware indexed verts                 *
- ****************************************************************************/
-
-#if (HAVE_ELTS)
-static void TAG(render_points_elts)( struct gl_context *ctx,
-				     GLuint start,
-				     GLuint count,
-				     GLuint flags )
-{
-   if (HAVE_POINTS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      ELT_INIT( GL_POINTS );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-	 currentsz = dmasz;
-
-      for (j = 0; j < count; j += nr ) {
-	 nr = MIN2( currentsz, count - j );
-         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-	 FLUSH();
-	 currentsz = dmasz;
-      }
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-
-static void TAG(render_lines_elts)(struct gl_context *ctx,
-                                   GLuint start,
-                                   GLuint count,
-                                   GLuint flags)
-{
-   LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   GLuint j, nr;
-
-   ELT_INIT(GL_LINES);
-
-   /* Emit whole number of lines in total and in each buffer:
-    */
-   count -= count & 1;
-   currentsz -= currentsz & 1;
-   dmasz -= dmasz & 1;
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = 0; j < count; j += nr ) {
-      nr = MIN2(currentsz, count - j);
-      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-
-static void TAG(render_line_strip_elts)(struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags)
-{
-   LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   GLuint j, nr;
-
-   FLUSH(); /* always a new primitive */
-   ELT_INIT(GL_LINE_STRIP);
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = 0; j + 1 < count; j += nr - 1) {
-      nr = MIN2(currentsz, count - j);
-      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-
-static void TAG(render_line_loop_elts)(struct gl_context *ctx,
-                                       GLuint start,
-                                       GLuint count,
-                                       GLuint flags)
-{
-   LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   GLuint j, nr;
-
-   FLUSH();
-   ELT_INIT(GL_LINE_STRIP);
-
-   j = (flags & PRIM_BEGIN) ? 0 : 1;
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   /* Ensure last vertex doesn't wrap:
-    */
-   currentsz--;
-   dmasz--;
-
-   if (j + 1 < count) {
-      for (/* empty */; j + 1 < count; j += nr - 1) {
-         nr = MIN2(currentsz, count - j);
-
-         if (j + nr >= count &&
-             count > 1 &&
-             (flags & PRIM_END)) {
-            void *tmp;
-            tmp = ALLOC_ELTS(nr+1);
-            tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
-            tmp = TAG(emit_elts)(ctx, elts + start, 1, tmp);
-            (void) tmp;
-         } else {
-            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-            currentsz = dmasz;
-         }
-      }
-   } else if (count > 1 && (flags & PRIM_END)) {
-      void *tmp;
-      tmp = ALLOC_ELTS(2);
-      tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
-      tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-      (void) tmp;
-   }
-
-   FLUSH();
-}
-
-
-/* For verts, we still eliminate the copy from main memory to dma
- * buffers.  For elts, this is probably no better (worse?) than the
- * standard path.
- */
-static void TAG(render_triangles_elts)( struct gl_context *ctx,
-					GLuint start,
-					GLuint count,
-					GLuint flags )
-{
-   LOCAL_VARS;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/3*3;
-   int currentsz;
-   GLuint j, nr;
-
-   FLUSH();
-   ELT_INIT( GL_TRIANGLES );
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-   /* Emit whole number of tris in total.  dmasz is already a multiple
-    * of 3.
-    */
-   count -= count % 3;
-   currentsz -= currentsz%3;
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = 0; j < count; j += nr) {
-      nr = MIN2( currentsz, count - j );
-      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-
-
-static void TAG(render_tri_strip_elts)(struct gl_context *ctx,
-                                       GLuint start,
-                                       GLuint count,
-                                       GLuint flags)
-{
-   LOCAL_VARS;
-   GLuint j, nr;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-
-   FLUSH();
-   ELT_INIT(GL_TRIANGLE_STRIP);
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   /* Keep the same winding over multiple buffers:
-    */
-   dmasz -= (dmasz & 1);
-   currentsz -= (currentsz & 1);
-
-   for (j = 0; j + 2 < count; j += nr - 2) {
-      nr = MIN2(currentsz, count - j);
-      TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-static void TAG(render_tri_fan_elts)(struct gl_context *ctx,
-                                     GLuint start,
-                                     GLuint count,
-                                     GLuint flags)
-{
-   LOCAL_VARS;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   GLuint j, nr;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-
-   FLUSH();
-   ELT_INIT(GL_TRIANGLE_FAN);
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = 1; j + 1 < count; j += nr - 2) {
-      void *tmp;
-      nr = MIN2(currentsz, count - j + 1);
-      tmp = ALLOC_ELTS(nr);
-      tmp = TAG(emit_elts)(ctx, elts + start, 1, tmp);
-      tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
-      (void) tmp;
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-
-static void TAG(render_poly_elts)( struct gl_context *ctx,
-				   GLuint start,
-				   GLuint count,
-				   GLuint flags )
-{
-   if (HAVE_POLYGONS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-
-      FLUSH();
-      ELT_INIT( GL_POLYGON );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-	 currentsz = dmasz;
-      }
-
-      for (j = 1 ; j + 1 < count; j += nr - 2) {
-	 void *tmp;
-	 nr = MIN2( currentsz, count - j + 1 );
-	 tmp = ALLOC_ELTS( nr );
-	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
-	 (void) tmp;
-	 FLUSH();
-	 currentsz = dmasz;
-      }
-   } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
-      TAG(render_tri_fan_verts)( ctx, start, count, flags );
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-static void TAG(render_quad_strip_elts)(struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags)
-{
-   LOCAL_VARS;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-   GLuint j, nr;
-
-   FLUSH();
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-   /* Emit whole number of quads in total, and in each buffer.
-    */
-   dmasz -= dmasz & 1;
-   count -= count & 1;
-   currentsz -= currentsz & 1;
-
-   if (currentsz < 12)
-      currentsz = dmasz;
-
-   if (ctx->Light.ShadeModel == GL_FLAT) {
-      ELT_INIT(GL_TRIANGLES);
-
-      currentsz = currentsz / 6 * 2;
-      dmasz = dmasz / 6 * 2;
-
-      for (j = 0; j + 3 < count; j += nr - 2) {
-         nr = MIN2(currentsz, count - j);
-
-         if (nr >= 4) {
-            GLint i;
-            GLint quads = (nr / 2) - 1;
-            ELTS_VARS(ALLOC_ELTS( quads * 6));
-
-            for (i = j; i < j + quads; i++, elts += 2) {
-               EMIT_TWO_ELTS(0, elts[0], elts[1]);
-               EMIT_TWO_ELTS(2, elts[2], elts[1]);
-               EMIT_TWO_ELTS(4, elts[3], elts[2]);
-               INCR_ELTS(6);
-            }
-
-            FLUSH();
-         }
-
-         currentsz = dmasz;
-      }
-   } else {
-      ELT_INIT(GL_TRIANGLE_STRIP);
-
-      for (j = 0; j + 3 < count; j += nr - 2) {
-         nr = MIN2(currentsz, count - j);
-         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
-         FLUSH();
-         currentsz = dmasz;
-      }
-   }
-}
-
-
-static void TAG(render_quads_elts)(struct gl_context *ctx,
-                                   GLuint start,
-                                   GLuint count,
-                                   GLuint flags)
-{
-   LOCAL_VARS;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-   int currentsz;
-   GLuint j, nr;
-
-   ELT_INIT(GL_TRIANGLES);
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-   /* Emit whole number of quads in total, and in each buffer.
-    */
-   count -= count & 3;
-   dmasz -= dmasz & 3;
-   currentsz -= currentsz & 3;
-
-   /* Adjust for rendering as triangles:
-    */
-   currentsz = currentsz / 6 * 4;
-   dmasz = dmasz / 6 * 4;
-
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = 0; j + 3 < count; j += nr - 2) {
-      nr = MIN2(currentsz, count - j);
-
-      if (nr >= 4) {
-         GLint quads = nr / 4;
-         GLint i;
-         ELTS_VARS(ALLOC_ELTS(quads * 6));
-
-         for (i = j; i < j + quads; i++, elts += 4) {
-            EMIT_TWO_ELTS(0, elts[0], elts[1]);
-            EMIT_TWO_ELTS(2, elts[3], elts[1]);
-            EMIT_TWO_ELTS(4, elts[2], elts[3]);
-            INCR_ELTS(6);
-         }
-
-         FLUSH();
-      }
-
-      currentsz = dmasz;
-   }
-}
-
-
-
-static tnl_render_func TAG(render_tab_elts)[GL_POLYGON+2] =
-{
-   TAG(render_points_elts),
-   TAG(render_lines_elts),
-   TAG(render_line_loop_elts),
-   TAG(render_line_strip_elts),
-   TAG(render_triangles_elts),
-   TAG(render_tri_strip_elts),
-   TAG(render_tri_fan_elts),
-   TAG(render_quads_elts),
-   TAG(render_quad_strip_elts),
-   TAG(render_poly_elts),
-   TAG(render_noop),
-};
-
-
-
-#endif
-
-
-
 /* Pre-check the primitives in the VB to prevent the need for
  * fallbacks later on.
  */
@@ -1012,7 +442,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
    if (VB->ClipOrMask & ~CLIP_CULL_BIT)
       return GL_FALSE;
 
-   if (VB->Elts && !HAVE_ELTS)
+   if (VB->Elts)
       return GL_FALSE;
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++) {
@@ -1045,23 +475,13 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
 	    ok = GL_TRUE;
 	 } else if (ctx->Light.ShadeModel == GL_FLAT &&
 		    VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
-	    if (HAVE_ELTS) {
-	       ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
-	    }
-	    else {
 	       ok = GL_FALSE;
-	    }
 	 }
 	 else 
 	    ok = GL_TRUE;
 	 break;
       case GL_QUADS:
-         if (HAVE_ELTS) {
-	    ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
-	 }
-	 else {
 	    ok = GL_TRUE; /* flatshading is ok. */
-	 }
 	 break;
       default:
 	 break;
-- 
cgit v1.2.3


From 57ae5c237d65b7283964f61779fcb07096da8b5b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:04:33 -0700
Subject: t_dd_dmatmp: Indentation and formatting fixes after HAVE_ELTS change

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 69 +++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index f55cfe87bd6..b33db7085c4 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -329,20 +329,20 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
    }
 }
 
-static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
-					  GLuint start,
-					  GLuint count,
-					  GLuint flags )
+static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
+                                         GLuint start,
+                                         GLuint count,
+                                         GLuint flags)
 {
    GLuint j, nr;
 
    if (ctx->Light.ShadeModel == GL_FLAT &&
        TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
-	 /* Vertices won't fit in a single buffer or elts not
-	  * available - should never happen.
-	  */
-        fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-	 return;
+      /* Vertices won't fit in a single buffer or elts not available - should
+       * never happen.
+       */
+      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
+      return;
    } else {
       LOCAL_VARS;
       int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
@@ -379,30 +379,28 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
                                     GLuint count,
                                     GLuint flags)
 {
+   LOCAL_VARS;
+   GLuint j;
+
    /* Emit whole number of quads in total. */
    count -= count & 3;
 
-   {
-      /* Hardware doesn't have a quad primitive type -- try to
-       * simulate it using triangle primitive.  This is a win for
-       * gears, but is it useful in the broader world?
+   /* Hardware doesn't have a quad primitive type -- try to simulate it using
+    * triangle primitive.  This is a win for gears, but is it useful in the
+    * broader world?
+    */
+   INIT(GL_TRIANGLES);
+
+   for (j = 0; j + 3 < count; j += 4) {
+      void *tmp = ALLOC_VERTS(6);
+      /* Send v0, v1, v3
        */
-      LOCAL_VARS;
-      GLuint j;
-
-      INIT(GL_TRIANGLES);
-
-      for (j = 0; j + 3 < count; j += 4) {
-	 void *tmp = ALLOC_VERTS( 6 );
-	 /* Send v0, v1, v3
-	  */
-	 tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
-	 tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
-	 /* Send v1, v2, v3
-	  */
-	 tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
-	 (void) tmp;
-      }
+      tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+      tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
+      /* Send v1, v2, v3
+       */
+      tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
+      (void) tmp;
    }
 }
 
@@ -471,17 +469,12 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
          ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
 	 break;
       case GL_QUAD_STRIP:
-	 if (VB->Elts) {
-	    ok = GL_TRUE;
-	 } else if (ctx->Light.ShadeModel == GL_FLAT &&
-		    VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
-	       ok = GL_FALSE;
-	 }
-	 else 
-	    ok = GL_TRUE;
+         ok = VB->Elts ||
+              (ctx->Light.ShadeModel != GL_FLAT ||
+               VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
 	 break;
       case GL_QUADS:
-	    ok = GL_TRUE; /* flatshading is ok. */
+         ok = GL_TRUE; /* flatshading is ok. */
 	 break;
       default:
 	 break;
-- 
cgit v1.2.3


From b7259fc6b0c3a94d2ddbea74470e551c877fc324 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:10:05 -0700
Subject: t_dd_dmatmp: General indentation and formatting fixes

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 111 +++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 56 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index b33db7085c4..036995c8ca5 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -52,8 +52,8 @@
 /*                  Render whole begin/end objects                    */
 /**********************************************************************/
 
-static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start, 
-				     GLuint count, void *buf )
+static inline void *TAG(emit_verts)(struct gl_context *ctx, GLuint start,
+                                    GLuint count, void *buf)
 {
    return EMIT_VERTS(ctx, start, count, buf);
 }
@@ -62,10 +62,10 @@ static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start,
  *                    Render non-indexed primitives.
  ***********************************************************************/
 
-static void TAG(render_points_verts)( struct gl_context *ctx,
-				      GLuint start,
-				      GLuint count,
-				      GLuint flags )
+static void TAG(render_points_verts)(struct gl_context *ctx,
+                                     GLuint start,
+                                     GLuint count,
+                                     GLuint flags)
 {
    if (HAVE_POINTS) {
       LOCAL_VARS;
@@ -73,18 +73,17 @@ static void TAG(render_points_verts)( struct gl_context *ctx,
       int currentsz;
       GLuint j, nr;
 
-      INIT( GL_POINTS );
+      INIT(GL_POINTS);
 
       currentsz = GET_CURRENT_VB_MAX_VERTS();
       if (currentsz < 8)
-	 currentsz = dmasz;
+         currentsz = dmasz;
 
       for (j = 0; j < count; j += nr) {
-	 nr = MIN2( currentsz, count - j );
+         nr = MIN2(currentsz, count - j);
          TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
+         currentsz = dmasz;
       }
-
    } else {
       fprintf(stderr, "%s - cannot draw primitive\n", __func__);
       return;
@@ -199,19 +198,19 @@ static void TAG(render_line_loop_verts)(struct gl_context *ctx,
 }
 
 
-static void TAG(render_triangles_verts)( struct gl_context *ctx,
-					 GLuint start,
-					 GLuint count,
-					 GLuint flags )
+static void TAG(render_triangles_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
 {
    LOCAL_VARS;
-   int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/3) * 3;
+   int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
    int currentsz;
    GLuint j, nr;
 
    INIT(GL_TRIANGLES);
 
-   currentsz = (GET_CURRENT_VB_MAX_VERTS()/3) * 3;
+   currentsz = (GET_CURRENT_VB_MAX_VERTS() / 3) * 3;
 
    /* Emit whole number of tris in total.  dmasz is already a multiple
     * of 3.
@@ -222,7 +221,7 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx,
       currentsz = dmasz;
 
    for (j = 0; j < count; j += nr) {
-      nr = MIN2( currentsz, count - j );
+      nr = MIN2(currentsz, count - j);
       TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
       currentsz = dmasz;
    }
@@ -291,10 +290,10 @@ static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
 }
 
 
-static void TAG(render_poly_verts)( struct gl_context *ctx,
-				    GLuint start,
-				    GLuint count,
-				    GLuint flags )
+static void TAG(render_poly_verts)(struct gl_context *ctx,
+                                   GLuint start,
+                                   GLuint count,
+                                   GLuint flags)
 {
    if (HAVE_POLYGONS) {
       LOCAL_VARS;
@@ -306,22 +305,21 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
 
       currentsz = GET_CURRENT_VB_MAX_VERTS();
       if (currentsz < 8) {
-	 currentsz = dmasz;
+         currentsz = dmasz;
       }
 
-      for (j = 1 ; j + 1 < count ; j += nr - 2 ) {
-	 void *tmp;
-	 nr = MIN2( currentsz, count - j + 1 );
-	 tmp = ALLOC_VERTS( nr );
-	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+      for (j = 1; j + 1 < count; j += nr - 2) {
+         void *tmp;
+         nr = MIN2(currentsz, count - j + 1);
+         tmp = ALLOC_VERTS(nr);
+         tmp = TAG(emit_verts)(ctx, start, 1, tmp);
          tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
-	 (void) tmp;
-	 currentsz = dmasz;
+         (void) tmp;
+         currentsz = dmasz;
       }
 
       FLUSH();
-   }
-   else if (ctx->Light.ShadeModel == GL_SMOOTH) {
+   } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
       TAG(render_tri_fan_verts)( ctx, start, count, flags );
    } else {
       fprintf(stderr, "%s - cannot draw primitive\n", __func__);
@@ -361,12 +359,12 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
       count -= count & 1;
 
       if (currentsz < 8)
-	 currentsz = dmasz;
+         currentsz = dmasz;
 
       for (j = 0; j + 3 < count; j += nr - 2) {
          nr = MIN2(currentsz, count - j);
          TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
-	 currentsz = dmasz;
+         currentsz = dmasz;
       }
 
       FLUSH();
@@ -404,16 +402,17 @@ static void TAG(render_quads_verts)(struct gl_context *ctx,
    }
 }
 
-static void TAG(render_noop)( struct gl_context *ctx,
-			      GLuint start,
-			      GLuint count,
-			      GLuint flags )
+static void TAG(render_noop)(struct gl_context *ctx,
+                             GLuint start,
+                             GLuint count,
+                             GLuint flags)
 {
+   (void) ctx;
+   (void) start;
+   (void) count;
+   (void) flags;
 }
 
-
-
-
 static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
 {
    TAG(render_points_verts),
@@ -432,8 +431,8 @@ static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
 /* Pre-check the primitives in the VB to prevent the need for
  * fallbacks later on.
  */
-static GLboolean TAG(validate_render)( struct gl_context *ctx,
-				       struct vertex_buffer *VB )
+static GLboolean TAG(validate_render)(struct gl_context *ctx,
+                                      struct vertex_buffer *VB)
 {
    GLint i;
 
@@ -449,40 +448,40 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
       GLboolean ok = GL_FALSE;
 
       if (!count)
-	 continue;
+         continue;
 
       switch (prim & PRIM_MODE_MASK) {
       case GL_POINTS:
-	 ok = HAVE_POINTS;
-	 break;
+         ok = HAVE_POINTS;
+         break;
       case GL_LINES:
       case GL_LINE_STRIP:
       case GL_LINE_LOOP:
-	 ok = !ctx->Line.StippleFlag;
-	 break;
+         ok = !ctx->Line.StippleFlag;
+         break;
       case GL_TRIANGLES:
       case GL_TRIANGLE_STRIP:
       case GL_TRIANGLE_FAN:
-	 ok = GL_TRUE;
-	 break;
+         ok = GL_TRUE;
+         break;
       case GL_POLYGON:
          ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
-	 break;
+         break;
       case GL_QUAD_STRIP:
          ok = VB->Elts ||
               (ctx->Light.ShadeModel != GL_FLAT ||
                VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
-	 break;
+         break;
       case GL_QUADS:
          ok = GL_TRUE; /* flatshading is ok. */
-	 break;
+         break;
       default:
-	 break;
+         break;
       }
       
       if (!ok) {
-/* 	 fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
-	 return GL_FALSE;
+/*          fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
+         return GL_FALSE;
       }
    }
 
-- 
cgit v1.2.3


From d663d8f5d4088407af982577be075fe7e79d4e3d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:12:06 -0700
Subject: t_dd_dmatmp: Use stdbool.h

No piglit regressions on i915 (G33) or radeon (Radeon 7500).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 036995c8ca5..a07393da303 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -24,7 +24,7 @@
  * Authors:
  *    Keith Whitwell <keithw@vmware.com>
  */
-
+#include <stdbool.h>
 
 /**
  * \file t_dd_dmatmp.h
@@ -431,21 +431,21 @@ static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
 /* Pre-check the primitives in the VB to prevent the need for
  * fallbacks later on.
  */
-static GLboolean TAG(validate_render)(struct gl_context *ctx,
-                                      struct vertex_buffer *VB)
+static bool TAG(validate_render)(struct gl_context *ctx,
+                                 struct vertex_buffer *VB)
 {
    GLint i;
 
    if (VB->ClipOrMask & ~CLIP_CULL_BIT)
-      return GL_FALSE;
+      return false;
 
    if (VB->Elts)
-      return GL_FALSE;
+      return false;
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++) {
       GLuint prim = VB->Primitive[i].mode;
       GLuint count = VB->Primitive[i].count;
-      GLboolean ok = GL_FALSE;
+      bool ok = false;
 
       if (!count)
          continue;
@@ -462,7 +462,7 @@ static GLboolean TAG(validate_render)(struct gl_context *ctx,
       case GL_TRIANGLES:
       case GL_TRIANGLE_STRIP:
       case GL_TRIANGLE_FAN:
-         ok = GL_TRUE;
+         ok = true;
          break;
       case GL_POLYGON:
          ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
@@ -473,7 +473,7 @@ static GLboolean TAG(validate_render)(struct gl_context *ctx,
                VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
          break;
       case GL_QUADS:
-         ok = GL_TRUE; /* flatshading is ok. */
+         ok = true; /* flatshading is ok. */
          break;
       default:
          break;
@@ -481,10 +481,10 @@ static GLboolean TAG(validate_render)(struct gl_context *ctx,
       
       if (!ok) {
 /*          fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
-         return GL_FALSE;
+         return false;
       }
    }
 
-   return GL_TRUE;
+   return true;
 }
 
-- 
cgit v1.2.3


From 8e9968f184dd1f9727d1095aa45936d5c6ec9f16 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:26:10 -0700
Subject: t_dd_dmatmp: Silence comparison between signed and unsigned integer
 expression warnings

../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:83:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:83:55: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                                                       ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:116:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:116:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:140:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:140:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h: In function 'intel_render_line_loop_verts':
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:174:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:174:55: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                                                       ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:224:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:224:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:255:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:255:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:281:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j + 1);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:281:56: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j + 1);
                                                        ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h: In function 'intel_render_poly_verts':
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:313:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j + 1);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:313:59: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j + 1);
                                                           ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:365:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - nr);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:365:56: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - nr);
                                                        ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:83:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:83:55: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                                                       ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:116:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:116:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:140:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:140:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h: In function 'radeon_dma_render_line_loop_verts':
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:174:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:174:55: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j);
                                                       ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:224:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:224:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:255:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:255:52: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j);
                                                    ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:281:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       nr = MIN2(currentsz, count - j + 1);
                         ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:281:56: warning: signed and unsigned type in conditional expression [-Wsign-compare]
       nr = MIN2(currentsz, count - j + 1);
                                                        ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h: In function 'radeon_dma_render_poly_verts':
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:313:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - j + 1);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:313:59: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - j + 1);
                                                           ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:365:28: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
          nr = MIN2(currentsz, count - nr);
                            ^
../../../../../src/mesa/tnl_dd/t_dd_dmatmp.h:365:56: warning: signed and unsigned type in conditional expression [-Wsign-compare]
          nr = MIN2(currentsz, count - nr);
                                                        ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index a07393da303..8f25797bc46 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -69,8 +69,8 @@ static void TAG(render_points_verts)(struct gl_context *ctx,
 {
    if (HAVE_POINTS) {
       LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      unsigned currentsz;
       GLuint j, nr;
 
       INIT(GL_POINTS);
@@ -96,8 +96,8 @@ static void TAG(render_lines_verts)(struct gl_context *ctx,
                                     GLuint flags)
 {
    LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-   int currentsz;
+   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
    GLuint j, nr;
 
    INIT(GL_LINES);
@@ -126,8 +126,8 @@ static void TAG(render_line_strip_verts)(struct gl_context *ctx,
                                          GLuint flags)
 {
    LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-   int currentsz;
+   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
    GLuint j, nr;
 
    INIT(GL_LINE_STRIP);
@@ -152,8 +152,8 @@ static void TAG(render_line_loop_verts)(struct gl_context *ctx,
                                         GLuint flags)
 {
    LOCAL_VARS;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-   int currentsz;
+   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
    GLuint j, nr;
 
    INIT(GL_LINE_STRIP);
@@ -204,8 +204,8 @@ static void TAG(render_triangles_verts)(struct gl_context *ctx,
                                         GLuint flags)
 {
    LOCAL_VARS;
-   int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
-   int currentsz;
+   unsigned dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
+   unsigned currentsz;
    GLuint j, nr;
 
    INIT(GL_TRIANGLES);
@@ -236,8 +236,8 @@ static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
 {
    LOCAL_VARS;
    GLuint j, nr;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-   int currentsz;
+   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
 
    INIT(GL_TRIANGLE_STRIP);
 
@@ -267,8 +267,8 @@ static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
 {
    LOCAL_VARS;
    GLuint j, nr;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-   int currentsz;
+   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
 
    INIT(GL_TRIANGLE_FAN);
 
@@ -298,8 +298,8 @@ static void TAG(render_poly_verts)(struct gl_context *ctx,
    if (HAVE_POLYGONS) {
       LOCAL_VARS;
       GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      unsigned currentsz;
 
       INIT(GL_POLYGON);
 
@@ -343,8 +343,8 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
       return;
    } else {
       LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      unsigned currentsz;
 
       /* Emit smooth-shaded quadstrips as tristrips:
        */
-- 
cgit v1.2.3


From d830965057bc01734dcca415969ebb6950f2e58c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:29:50 -0700
Subject: t_dd_dmatmp: Constify dmasz

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 8f25797bc46..b229f42c164 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -69,7 +69,7 @@ static void TAG(render_points_verts)(struct gl_context *ctx,
 {
    if (HAVE_POINTS) {
       LOCAL_VARS;
-      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       unsigned currentsz;
       GLuint j, nr;
 
@@ -96,7 +96,7 @@ static void TAG(render_lines_verts)(struct gl_context *ctx,
                                     GLuint flags)
 {
    LOCAL_VARS;
-   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
    unsigned currentsz;
    GLuint j, nr;
 
@@ -107,7 +107,6 @@ static void TAG(render_lines_verts)(struct gl_context *ctx,
    count -= count & 1;
    currentsz = GET_CURRENT_VB_MAX_VERTS();
    currentsz -= currentsz & 1;
-   dmasz -= dmasz & 1;
 
    if (currentsz < 8)
       currentsz = dmasz;
@@ -126,7 +125,7 @@ static void TAG(render_line_strip_verts)(struct gl_context *ctx,
                                          GLuint flags)
 {
    LOCAL_VARS;
-   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
    unsigned currentsz;
    GLuint j, nr;
 
@@ -152,7 +151,7 @@ static void TAG(render_line_loop_verts)(struct gl_context *ctx,
                                         GLuint flags)
 {
    LOCAL_VARS;
-   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() - 1;
    unsigned currentsz;
    GLuint j, nr;
 
@@ -164,7 +163,6 @@ static void TAG(render_line_loop_verts)(struct gl_context *ctx,
     */
    currentsz = GET_CURRENT_VB_MAX_VERTS();
    currentsz--;
-   dmasz--;
 
    if (currentsz < 8)
       currentsz = dmasz;
@@ -204,7 +202,7 @@ static void TAG(render_triangles_verts)(struct gl_context *ctx,
                                         GLuint flags)
 {
    LOCAL_VARS;
-   unsigned dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
+   const unsigned dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
    unsigned currentsz;
    GLuint j, nr;
 
@@ -236,7 +234,7 @@ static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
 {
    LOCAL_VARS;
    GLuint j, nr;
-   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
    unsigned currentsz;
 
    INIT(GL_TRIANGLE_STRIP);
@@ -248,7 +246,6 @@ static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
 
    /* From here on emit even numbers of tris when wrapping over buffers:
     */
-   dmasz -= (dmasz & 1);
    currentsz -= (currentsz & 1);
 
    for (j = 0; j + 2 < count; j += nr - 2) {
@@ -267,7 +264,7 @@ static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
 {
    LOCAL_VARS;
    GLuint j, nr;
-   unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
    unsigned currentsz;
 
    INIT(GL_TRIANGLE_FAN);
@@ -298,7 +295,7 @@ static void TAG(render_poly_verts)(struct gl_context *ctx,
    if (HAVE_POLYGONS) {
       LOCAL_VARS;
       GLuint j, nr;
-      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
       unsigned currentsz;
 
       INIT(GL_POLYGON);
@@ -343,7 +340,7 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
       return;
    } else {
       LOCAL_VARS;
-      unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
       unsigned currentsz;
 
       /* Emit smooth-shaded quadstrips as tristrips:
@@ -353,7 +350,6 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
 
       /* Emit whole number of quads in total, and in each buffer.
        */
-      dmasz -= dmasz & 1;
       currentsz = GET_CURRENT_VB_MAX_VERTS();
       currentsz -= currentsz & 1;
       count -= count & 1;
-- 
cgit v1.2.3


From abbaf3301f2405adf61d2804706c00a391a1336e Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 14 Sep 2015 17:57:15 -0700
Subject: mesa: Remove unused HAVE_TRI_STRIP_1 defines

Defined to 0 in a few places, but it's not used anywhere.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/dri/i915/intel_render.c   | 1 -
 src/mesa/drivers/dri/r200/r200_swtcl.c     | 1 -
 src/mesa/drivers/dri/r200/r200_tcl.c       | 1 -
 src/mesa/drivers/dri/radeon/radeon_swtcl.c | 1 -
 src/mesa/drivers/dri/radeon/radeon_tcl.c   | 1 -
 5 files changed, 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index 990f65c188c..cf4827e238e 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -61,7 +61,6 @@
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
 #define HAVE_TRI_FANS    1
 #define HAVE_POLYGONS    1
 
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index c39b71dece1..bb9be210567 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -350,7 +350,6 @@ static void r200ResetLineStipple( struct gl_context *ctx );
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_QUADS       0
 #define HAVE_QUAD_STRIPS 0
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index 3eccaa7b7ad..747275334b6 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -61,7 +61,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_QUADS       1
 #define HAVE_QUAD_STRIPS 1
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index dce8aea218e..b671a3be143 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -353,7 +353,6 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_POLYGONS    0
 /* \todo: is it possible to make "ELTS" work with t_vertex code ? */
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 1d07459a62a..3e2f4261600 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -65,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_QUADS       0
 #define HAVE_QUAD_STRIPS 0
-- 
cgit v1.2.3


From 84fec757de2f8c6ba47f2f56077993d62538711a Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Mon, 23 Mar 2015 14:47:23 +0200
Subject: t_dd_dmatmp: Make the render_tab[]s const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tables hold function pointers and they never change so
make them const.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index b229f42c164..e7e19a03597 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -409,7 +409,7 @@ static void TAG(render_noop)(struct gl_context *ctx,
    (void) flags;
 }
 
-static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
+static const tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
 {
    TAG(render_points_verts),
    TAG(render_lines_verts),
-- 
cgit v1.2.3


From aae0c88797e7e44c55873b3e97cceed5c6e6cded Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Mon, 23 Mar 2015 14:47:28 +0200
Subject: i915: Make hw_prim[] const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The table used to map the GL primitive to the hw primitive never
changes so make it const.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i915/intel_render.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index cf4827e238e..6c2ad6c6c95 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -66,7 +66,7 @@
 
 #define HAVE_ELTS        0
 
-static uint32_t hw_prim[GL_POLYGON + 1] = {
+static const uint32_t hw_prim[GL_POLYGON + 1] = {
    0,
    PRIM3D_LINELIST,
    PRIM3D_LINESTRIP,
-- 
cgit v1.2.3


From 65e80ce5b565953ffb07586790eeea51199ca9cf Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 21:11:23 -0700
Subject: nir/lower_vec_to_movs: Properly handle source modifiers on vecN ops

I don't know of any piglit tests that are currently broken.  However, there
is nothing stopping a vecN instruction from getting source modifiers and
lower_vec_to_movs is run after we lower to source modifiers.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 622e59c688e..287f2bf3d8b 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -68,12 +68,16 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
 
    mov->dest.write_mask = (1u << start_idx);
    mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
+   mov->src[0].negate = vec->src[start_idx].negate;
+   mov->src[0].abs = vec->src[start_idx].abs;
 
    for (unsigned i = start_idx + 1; i < 4; i++) {
       if (!(vec->dest.write_mask & (1 << i)))
          continue;
 
-      if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src)) {
+      if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
+          vec->src[i].negate == vec->src[start_idx].negate &&
+          vec->src[i].abs == vec->src[start_idx].abs) {
          mov->dest.write_mask |= (1 << i);
          mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
       }
-- 
cgit v1.2.3


From 8dcbca59572a856ca554af9ab3f52120b6f2a929 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 22 Sep 2015 21:32:06 -0700
Subject: nir/lower_vec_to_movs: Don't emit unneeded movs

It's possible that, if a vecN operation is involved in a phi node, that we
could end up moving from a register to itself.  If swizzling is involved,
we need to emit the move but.  However, if there is no swizzling, then the
mov is a no-op and we might as well not bother emitting it.

Shader-db results on Haswell:

   total instructions in shared programs: 6262536 -> 6259558 (-0.05%)
   instructions in affected programs:     184780 -> 181802 (-1.61%)
   helped:                                838
   HURT:                                  0

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 287f2bf3d8b..c08b721dae4 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -83,7 +83,25 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
       }
    }
 
-   nir_instr_insert_before(&vec->instr, &mov->instr);
+   /* In some situations (if the vecN is involved in a phi-web), we can end
+    * up with a mov from a register to itself.  Some of those channels may end
+    * up doing nothing and there's no reason to have them as part of the mov.
+    */
+   if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) &&
+       !mov->src[0].abs && !mov->src[0].negate) {
+      for (unsigned i = 0; i < 4; i++) {
+         if (mov->src[0].swizzle[i] == i) {
+            mov->dest.write_mask &= ~(1 << i);
+         }
+      }
+   }
+
+   /* Only emit the instruction if it actually does something */
+   if (mov->dest.write_mask) {
+      nir_instr_insert_before(&vec->instr, &mov->instr);
+   } else {
+      ralloc_free(mov);
+   }
 
    return mov->dest.write_mask;
 }
-- 
cgit v1.2.3


From 6a67ede6b33cfb3bc35d09dcd25a731cba3b7780 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 3 Sep 2015 01:29:38 -0700
Subject: nir: Validate that a block doesn't have two identical successors.

This is invalid, and causes disasters if we try to unlink successors:
removing the first will work, but removing the second copy will fail
because the block isn't in the successor's predecessor set any longer.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_validate.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index 9938c0ef8b1..1c9993a9c80 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -586,6 +586,7 @@ validate_block(nir_block *block, validate_state *state)
    }
 
    assert(block->successors[0] != NULL);
+   assert(block->successors[0] != block->successors[1]);
 
    for (unsigned i = 0; i < 2; i++) {
       if (block->successors[i] != NULL) {
-- 
cgit v1.2.3


From e2637db618b868682e1c996b3c6394c2e82963f1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 3 Sep 2015 00:31:19 -0700
Subject: nir/cf: Make a helper function for removing a predecessor.

I need to do this in a second place, and I'd rather make a helper
function than cut and paste the code.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 768dfd2aff3..43e4e43aede 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -60,6 +60,16 @@ block_add_pred(nir_block *block, nir_block *pred)
    _mesa_set_add(block->predecessors, pred);
 }
 
+static inline void
+block_remove_pred(nir_block *block, nir_block *pred)
+{
+   struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+
+   assert(entry);
+
+   _mesa_set_remove(block->predecessors, entry);
+}
+
 static void
 link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
 {
@@ -83,11 +93,7 @@ unlink_blocks(nir_block *pred, nir_block *succ)
       pred->successors[1] = NULL;
    }
 
-   struct set_entry *entry = _mesa_set_search(succ->predecessors, pred);
-
-   assert(entry);
-
-   _mesa_set_remove(succ->predecessors, entry);
+   block_remove_pred(succ, pred);
 }
 
 static void
-- 
cgit v1.2.3


From 9674c76c0e473a3edbc45f935ea88afd64024325 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 3 Sep 2015 00:33:50 -0700
Subject: nir/cf: Don't break outer-block successors in
 split_block_beginning().

Consider the following NIR:

   block block_0;
   /* succs: block_1 block_2 */
   if (...) {
      block block_1;
      ...
   } else {
      block block_2;
   }

Calling split_block_beginning() on block_1 would break block_0's
successors:  link_block() sets both successors of a block, so calling
link_block(block_0, new_block, NULL) would throw away the second
successor, leaving only /* succ: new_block */.  This is invalid: the
block before an if statement must have two successors.

Changing the call to link_block(pred, new_block, pred->successors[0])
would correctly leave both successors in place, but because unlink_block
may shift successor[1] to successor[0], it may not preserve the original
order.  NIR maintains a convention that successor[0] must point to the
"then" block, while successor[1] points to the "else" block, so we need
to take care to preserve this ordering.

This patch creates a new function that swaps out one successor for
another, preserving the ordering.  It then uses this to fix the issue.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 43e4e43aede..e2a151dafac 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -199,6 +199,23 @@ link_block_to_non_block(nir_block *block, nir_cf_node *node)
 
 }
 
+/**
+ * Replace a block's successor with a different one.
+ */
+static void
+replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+{
+   if (block->successors[0] == old_succ) {
+      block->successors[0] = new_succ;
+   } else {
+      assert(block->successors[1] == old_succ);
+      block->successors[1] = new_succ;
+   }
+
+   block_remove_pred(old_succ, block);
+   block_add_pred(new_succ, block);
+}
+
 /**
  * Takes a basic block and inserts a new empty basic block before it, making its
  * predecessors point to the new block. This essentially splits the block into
@@ -217,9 +234,7 @@ split_block_beginning(nir_block *block)
    struct set_entry *entry;
    set_foreach(block->predecessors, entry) {
       nir_block *pred = (nir_block *) entry->key;
-
-      unlink_blocks(pred, block);
-      link_blocks(pred, new_block, NULL);
+      replace_successor(pred, block, new_block);
    }
 
    /* Any phi nodes must stay part of the new block, or else their
-- 
cgit v1.2.3


From 0991b2eb3535f9af289149c9e63c38b56cb4b549 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 22 Sep 2015 18:04:14 -0700
Subject: nir/cf: Conditionally do block_add_normal_succs() in unlink_jump();

There is a bug where we mess up predecessors/successors due to the
ordering of unlinking/recreating edges/adding fake edges.  In order to
fix that, I need everything in one routine.

However, calling block_add_normal_succs() isn't safe from
cleanup_cf_node() - it would crash trying to insert phi undefs.
So unfortunately I need to add a parameter.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index e2a151dafac..2b23f38a6a2 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -548,8 +548,8 @@ remove_phi_src(nir_block *block, nir_block *pred)
  * infinite loops. Note that the jump to be eliminated may be free-floating.
  */
 
-static
-void unlink_jump(nir_block *block, nir_jump_type type)
+static void
+unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
 {
    if (block->successors[0])
       remove_phi_src(block->successors[0], block);
@@ -574,14 +574,14 @@ void unlink_jump(nir_block *block, nir_jump_type type)
    }
 
    unlink_block_successors(block);
+   if (add_normal_successors)
+      block_add_normal_succs(block);
 }
 
 void
 nir_handle_remove_jump(nir_block *block, nir_jump_type type)
 {
-   unlink_jump(block, type);
-
-   block_add_normal_succs(block);
+   unlink_jump(block, type, true);
 
    nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
    nir_metadata_preserve(impl, nir_metadata_none);
@@ -689,7 +689,7 @@ cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
       nir_foreach_instr_safe(block, instr) {
          if (instr->type == nir_instr_type_jump) {
             nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
-            unlink_jump(block, jump_type);
+            unlink_jump(block, jump_type, false);
          } else {
             nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
             nir_instr_remove(instr);
-- 
cgit v1.2.3


From 024e5ec9777c38f8c05be6678a9f51b145a00236 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 18 Sep 2015 13:11:56 -0700
Subject: nir/cf: Alter block successors before adding a fake link.

Consider the case of "while (...) { break }".  Or in NIR:

        block block_0 (0x7ab640):
        ...
        /* succs: block_1 */
        loop {
                block block_1:
                /* preds: block_0 */
                break
                /* succs: block_2 */
        }
        block block_2:

Calling nir_handle_remove_jump(block_1, nir_jump_break) will remove the break.
Unfortunately, it would mangle the predecessors and successors.

Here, block_2->predecessors->entries == 1, so we would create a fake
link, setting block_1->successors[1] = block_2, and adding block_1 to
block_2's predecessor set.  This is illegal: a block cannot specify the
same successor twice.  In particular, adding the predecessor would have
no effect, as it was already present in the set.

We'd then call unlink_block_successors(), which would delete the fake
link and remove block_1 from block_2's predecessor set.  It would then
delete successors[0], and attempt to remove block_1 from block_2's
predecessor set a second time...except that it wouldn't be present,
triggering an assertion failure.

The fix appears to be simple: simply unlink the block's successors and
recreate them to point at the correct blocks first.  Then, add the fake
link.  In the above example, removing the break would cause block_1 to
have itself as a successor (as it becomes an infinite loop), so adding
the fake link won't cause a duplicate successor.

v2: Add comments (requested by Connor Abbott) and fix commit message.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 44 ++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 2b23f38a6a2..87bc7163efd 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -551,31 +551,43 @@ remove_phi_src(nir_block *block, nir_block *pred)
 static void
 unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
 {
+   nir_block *next = block->successors[0];
+
    if (block->successors[0])
       remove_phi_src(block->successors[0], block);
    if (block->successors[1])
       remove_phi_src(block->successors[1], block);
 
-   if (type == nir_jump_break) {
-      nir_block *next = block->successors[0];
+   unlink_block_successors(block);
+   if (add_normal_successors)
+      block_add_normal_succs(block);
 
-      if (next->predecessors->entries == 1) {
-         nir_loop *loop =
-            nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+   /* If we've just removed a break, and the block we were jumping to (after
+    * the loop) now has zero predecessors, we've created a new infinite loop.
+    *
+    * NIR doesn't allow blocks (other than the start block) to have zero
+    * predecessors.  In particular, dominance assumes all blocks are reachable.
+    * So, we insert a "fake link" by making successors[1] point after the loop.
+    *
+    * Note that we have to do this after unlinking/recreating the block's
+    * successors.  If we removed a "break" at the end of the loop, then
+    * block == last_block, so block->successors[0] would already be "next",
+    * and adding a fake link would create two identical successors.  Doing
+    * this afterward works, as we'll have changed block->successors[0] to
+    * be the top of the loop.
+    */
+   if (type == nir_jump_break && next->predecessors->entries == 0) {
+      nir_loop *loop =
+         nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
 
-         /* insert fake link */
-         nir_cf_node *last = nir_loop_last_cf_node(loop);
-         assert(last->type == nir_cf_node_block);
-         nir_block *last_block = nir_cf_node_as_block(last);
+      /* insert fake link */
+      nir_cf_node *last = nir_loop_last_cf_node(loop);
+      assert(last->type == nir_cf_node_block);
+      nir_block *last_block = nir_cf_node_as_block(last);
 
-         last_block->successors[1] = next;
-         block_add_pred(next, last_block);
-      }
+      last_block->successors[1] = next;
+      block_add_pred(next, last_block);
    }
-
-   unlink_block_successors(block);
-   if (add_normal_successors)
-      block_add_normal_succs(block);
 }
 
 void
-- 
cgit v1.2.3


From 6560838703431f89c47d68822758bc76fd34c355 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 1 Sep 2015 22:56:29 -0700
Subject: nir/cf: Fix unlink_block_successors to actually unlink the second
 one.

Calling unlink_blocks(block, block->successors[0]) will successfully
unlink the first successor, but then will shift block->successors[1]
down to block->successor[0].  So the successors[1] != NULL check will
always fail.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 87bc7163efd..55d0689c45e 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -99,10 +99,10 @@ unlink_blocks(nir_block *pred, nir_block *succ)
 static void
 unlink_block_successors(nir_block *block)
 {
-   if (block->successors[0] != NULL)
-      unlink_blocks(block, block->successors[0]);
    if (block->successors[1] != NULL)
       unlink_blocks(block, block->successors[1]);
+   if (block->successors[0] != NULL)
+      unlink_blocks(block, block->successors[0]);
 }
 
 static void
-- 
cgit v1.2.3


From fbaa1b19d7accc5de95d6804525aad5b95abba72 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 19 Sep 2015 04:40:07 -0700
Subject: nir/cf: Fix dominance metadata in the dead control flow pass.

The NIR control flow modification API churns the block structure,
splitting blocks, stitching them back together, and so on.  Preserving
information about block dominance is hard (and probably not worthwhile).

This patch makes nir_cf_extract() throw away all metadata, like we do
when adding/removing jumps.

We then make the dead control flow pass compute dominance information
right before it uses it.  This is necessary because earlier work by the
pass may have invalidated it.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_control_flow.c | 3 +++
 src/glsl/nir/nir_opt_dead_cf.c  | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 55d0689c45e..7f51c4faf49 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -756,6 +756,9 @@ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end)
    extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
    exec_list_make_empty(&extracted->list);
 
+   /* Dominance and other block-related information is toast. */
+   nir_metadata_preserve(extracted->impl, nir_metadata_none);
+
    nir_cf_node *cf_node = &block_begin->cf_node;
    nir_cf_node *cf_node_end = &block_end->cf_node;
    while (true) {
diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
index 317bbc5ba63..0d4819b5158 100644
--- a/src/glsl/nir/nir_opt_dead_cf.c
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -203,6 +203,10 @@ loop_is_dead(nir_loop *loop)
                                      NULL))
       return false;
 
+   nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
+   nir_metadata_require(impl, nir_metadata_live_variables |
+                              nir_metadata_dominance);
+
    for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
       nir_foreach_instr(cur, instr) {
          if (!nir_foreach_ssa_def(instr, def_not_live_out, after))
@@ -332,9 +336,6 @@ dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
 static bool
 opt_dead_cf_impl(nir_function_impl *impl)
 {
-   nir_metadata_require(impl, nir_metadata_live_variables |
-                              nir_metadata_dominance);
-
    bool dummy;
    bool progress = dead_cf_list(&impl->body, &dummy);
 
-- 
cgit v1.2.3


From 0a040975ec1d27b62c6f8bf16093df3de898e9b3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 21 Sep 2015 13:21:10 -0700
Subject: nir: Add unit tests for control flow graphs.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
Acked-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/Makefile.am                      |  14 +++
 src/glsl/nir/tests/control_flow_tests.cpp | 155 ++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+)
 create mode 100644 src/glsl/nir/tests/control_flow_tests.cpp

(limited to 'src')

diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 1aa9caa3b5a..32653911f6c 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -50,12 +50,14 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README	\
 	nir/nir_opcodes_c.py				\
 	nir/nir_opcodes_h.py				\
 	nir/nir_opt_algebraic.py			\
+	nir/tests					\
 	SConscript
 
 include Makefile.sources
 
 TESTS = glcpp/tests/glcpp-test				\
 	glcpp/tests/glcpp-test-cr-lf			\
+        nir/tests/control_flow_tests			\
 	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/optimization-test				\
@@ -70,6 +72,7 @@ noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la
 check_PROGRAMS =					\
 	glcpp/glcpp					\
 	glsl_test					\
+	nir/tests/control_flow_tests			\
 	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/sampler-types-test			\
@@ -263,3 +266,14 @@ nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
 nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
 	$(MKDIR_GEN)
 	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+
+nir_tests_control_flow_tests_SOURCES =			\
+	nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS =			\
+	$(PTHREAD_CFLAGS)
+nir_tests_control_flow_tests_LDADD =			\
+	$(top_builddir)/src/gtest/libgtest.la		\
+	$(top_builddir)/src/glsl/libnir.la		\
+	$(top_builddir)/src/libglsl_util.la		\
+	$(top_builddir)/src/util/libmesautil.la		\
+	$(PTHREAD_LIBS)
diff --git a/src/glsl/nir/tests/control_flow_tests.cpp b/src/glsl/nir/tests/control_flow_tests.cpp
new file mode 100644
index 00000000000..b9f90e66d1d
--- /dev/null
+++ b/src/glsl/nir/tests/control_flow_tests.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class nir_cf_test : public ::testing::Test {
+protected:
+   nir_cf_test();
+   ~nir_cf_test();
+
+   nir_builder b;
+   nir_shader *shader;
+   nir_function_impl *impl;
+};
+
+nir_cf_test::nir_cf_test()
+{
+   static const nir_shader_compiler_options options = { };
+   shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, &options);
+   nir_function *func = nir_function_create(shader, "main");
+   nir_function_overload *overload = nir_function_overload_create(func);
+   impl = nir_function_impl_create(overload);
+
+   nir_builder_init(&b, impl);
+}
+
+nir_cf_test::~nir_cf_test()
+{
+   ralloc_free(shader);
+}
+
+TEST_F(nir_cf_test, delete_break_in_loop)
+{
+   /* Create IR:
+    *
+    * while (...) { break; }
+    */
+   nir_loop *loop = nir_loop_create(shader);
+   nir_cf_node_insert(nir_after_cf_list(&impl->body), &loop->cf_node);
+
+   b.cursor = nir_after_cf_list(&loop->body);
+
+   nir_jump_instr *jump = nir_jump_instr_create(shader, nir_jump_break);
+   nir_builder_instr_insert(&b, &jump->instr);
+
+   /* At this point, we should have:
+    *
+    * impl main {
+    *         block block_0:
+    *         // preds:
+    *         // succs: block_1
+    *         loop {
+    *                 block block_1:
+    *                 // preds: block_0
+    *                 break
+    *                 // succs: block_2
+    *         }
+    *         block block_2:
+    *         // preds: block_1
+    *         // succs: block_3
+    *         block block_3:
+    * }
+    */
+   nir_block *block_0 = nir_start_block(impl);
+   nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
+   nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+   nir_block *block_3 = impl->end_block;
+   ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type);
+
+   /* Verify the successors and predecessors. */
+   EXPECT_EQ(block_1, block_0->successors[0]);
+   EXPECT_EQ(NULL,    block_0->successors[1]);
+   EXPECT_EQ(block_2, block_1->successors[0]);
+   EXPECT_EQ(NULL,    block_1->successors[1]);
+   EXPECT_EQ(block_3, block_2->successors[0]);
+   EXPECT_EQ(NULL,    block_2->successors[1]);
+   EXPECT_EQ(NULL,    block_3->successors[0]);
+   EXPECT_EQ(NULL,    block_3->successors[1]);
+   EXPECT_EQ(0,       block_0->predecessors->entries);
+   EXPECT_EQ(1,       block_1->predecessors->entries);
+   EXPECT_EQ(1,       block_2->predecessors->entries);
+   EXPECT_EQ(1,       block_3->predecessors->entries);
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+   EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+   nir_print_shader(shader, stderr);
+
+   /* Now remove the break. */
+   nir_instr_remove(&jump->instr);
+
+   nir_print_shader(shader, stderr);
+
+   /* At this point, we should have:
+    *
+    * impl main {
+    *         block block_0:
+    *         // preds:
+    *         // succs: block_1
+    *         loop {
+    *                 block block_1:
+    *                 // preds: block_0 block_1
+    *                 // succs: block_1
+    *         }
+    *         block block_2:
+    *         // preds: block_1
+    *         // succs: block_3
+    *         block block_3:
+    * }
+    *
+    * Re-verify the predecessors and successors.
+    */
+   EXPECT_EQ(block_1, block_0->successors[0]);
+   EXPECT_EQ(NULL,    block_0->successors[1]);
+   EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */
+   EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */
+   EXPECT_EQ(block_3, block_2->successors[0]);
+   EXPECT_EQ(NULL,    block_2->successors[1]);
+   EXPECT_EQ(NULL,    block_3->successors[0]);
+   EXPECT_EQ(NULL,    block_3->successors[1]);
+   EXPECT_EQ(0,       block_0->predecessors->entries);
+   EXPECT_EQ(2,       block_1->predecessors->entries);
+   EXPECT_EQ(1,       block_2->predecessors->entries);
+   EXPECT_EQ(1,       block_3->predecessors->entries);
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+}
-- 
cgit v1.2.3


From 542d40d698a698dc656c7a64ddcea07060707555 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 12 May 2015 01:05:29 -0700
Subject: nir: Add new GS intrinsics that maintain a count of emitted vertices.

This patch also introduces a lowering pass to convert the simple GS
intrinsics to the new ones.  See the comments above that for the
rationale behind the new intrinsics.

This should be useful for i965; it's a generic enough mechanism that I
could see other drivers potentially using it as well, so I don't feel
too bad about putting it in the generic code.

v2:
- Use nir_after_block_before_jump for the cursor (caught by Jason
  Ekstrand - I'd mistakenly used nir_after_block when rebasing this
  code onto the new NIR control flow API).
- Remove the old emit_vertex intrinsic at the end, rather than in
  the middle (requested by Jason).
- Use state->... directly rather than locals (requested by Jason).
- Report progress from nir_lower_gs_intrinsics() (requested by me).
- Remove "Authors:" section from file comment (requested by
  Michael Schellenberger Costa).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/Makefile.sources              |   1 +
 src/glsl/nir/nir.h                     |   2 +
 src/glsl/nir/nir_intrinsics.h          |  21 ++++
 src/glsl/nir/nir_lower_gs_intrinsics.c | 218 +++++++++++++++++++++++++++++++++
 4 files changed, 242 insertions(+)
 create mode 100644 src/glsl/nir/nir_lower_gs_intrinsics.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index f7c69f4fe3f..a8f4994cf34 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -37,6 +37,7 @@ NIR_FILES = \
 	nir/nir_lower_atomics.c \
 	nir/nir_lower_clip.c \
 	nir/nir_lower_global_vars_to_local.c \
+	nir/nir_lower_gs_intrinsics.c \
 	nir/nir_lower_load_const_to_scalar.c \
 	nir/nir_lower_locals_to_regs.c \
 	nir/nir_lower_idiv.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 619a363b540..4f45770e02d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1883,6 +1883,8 @@ void nir_lower_two_sided_color(nir_shader *shader);
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
+bool nir_lower_gs_intrinsics(nir_shader *shader);
+
 bool nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_variables_impl(nir_function_impl *impl);
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 70cae4256a3..b21460da5c0 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -79,9 +79,30 @@ BARRIER(memory_barrier)
 /** A conditional discard, with a single boolean source. */
 INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
 
+/**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
 INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
 INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
 
+/**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
 /*
  * Atomic counters
  *
diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c
new file mode 100644
index 00000000000..2ee4e5c45d6
--- /dev/null
+++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * \file nir_lower_gs_intrinsics.c
+ *
+ * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
+ * arbitrary number of vertices.  However, the shader must declare the maximum
+ * number of vertices that it will ever output - further attempts to emit
+ * vertices result in undefined behavior according to the GLSL specification.
+ *
+ * Drivers might use this maximum number of vertices to allocate enough space
+ * to hold the geometry shader's output.  Some drivers (such as i965) need to
+ * implement "safety checks" which ensure that the shader hasn't emitted too
+ * many vertices, to avoid overflowing that space and trashing other memory.
+ *
+ * The count of emitted vertices can also be useful in buffer offset
+ * calculations, so drivers know where to write the GS output.
+ *
+ * However, for simple geometry shaders that emit a statically determinable
+ * number of vertices, this extra bookkeeping is unnecessary and inefficient.
+ * By tracking the vertex count in NIR, we allow constant folding/propagation
+ * and dead control flow optimizations to eliminate most of it where possible.
+ *
+ * This pass introduces a new global variable which stores the current vertex
+ * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics
+ * to their *_with_counter variants.  emit_vertex is also wrapped in a safety
+ * check to avoid buffer overflows.  Finally, it adds a set_vertex_count
+ * intrinsic at the end of the program, informing the driver of the final
+ * vertex count.
+ */
+
+struct state {
+   nir_builder *builder;
+   nir_variable *vertex_count_var;
+   bool progress;
+};
+
+/**
+ * Replace emit_vertex intrinsics with:
+ *
+ * if (vertex_count < max_vertices) {
+ *    emit_vertex_with_counter vertex_count ...
+ *    vertex_count += 1
+ * }
+ */
+static void
+rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
+{
+   nir_builder *b = state->builder;
+
+   /* Load the vertex count */
+   b->cursor = nir_before_instr(&intrin->instr);
+   nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+   nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
+
+   /* Create: if (vertex_count < max_vertices) and insert it.
+    *
+    * The new if statement needs to be hooked up to the control flow graph
+    * before we start inserting instructions into it.
+    */
+   nir_if *if_stmt = nir_if_create(b->shader);
+   if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices));
+   nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+   /* Fill out the new then-block */
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   nir_intrinsic_instr *lowered =
+      nir_intrinsic_instr_create(b->shader,
+                                 nir_intrinsic_emit_vertex_with_counter);
+   lowered->const_index[0] = intrin->const_index[0];
+   lowered->src[0] = nir_src_for_ssa(count);
+   nir_builder_instr_insert(b, &lowered->instr);
+
+   /* Increment the vertex count by 1 */
+   nir_store_var(b, state->vertex_count_var,
+                 nir_iadd(b, count, nir_imm_int(b, 1)));
+
+   nir_instr_remove(&intrin->instr);
+
+   state->progress = true;
+}
+
+/**
+ * Replace end_primitive with end_primitive_with_counter.
+ */
+static void
+rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
+{
+   nir_builder *b = state->builder;
+
+   b->cursor = nir_before_instr(&intrin->instr);
+   nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+   nir_intrinsic_instr *lowered =
+      nir_intrinsic_instr_create(b->shader,
+                                 nir_intrinsic_end_primitive_with_counter);
+   lowered->const_index[0] = intrin->const_index[0];
+   lowered->src[0] = nir_src_for_ssa(count);
+   nir_builder_instr_insert(b, &lowered->instr);
+
+   nir_instr_remove(&intrin->instr);
+
+   state->progress = true;
+}
+
+static bool
+rewrite_intrinsics(nir_block *block, void *closure)
+{
+   struct state *state = closure;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_emit_vertex:
+         rewrite_emit_vertex(intrin, state);
+         break;
+      case nir_intrinsic_end_primitive:
+         rewrite_end_primitive(intrin, state);
+         break;
+      default:
+         /* not interesting; skip this */
+         break;
+      }
+   }
+
+   return true;
+}
+
+/**
+ * Add a set_vertex_count intrinsic at the end of the program
+ * (representing the final vertex count).
+ */
+static void
+append_set_vertex_count(nir_block *end_block, struct state *state)
+{
+   nir_builder *b = state->builder;
+   nir_shader *shader = state->builder->shader;
+
+   /* Insert the new intrinsic in all of the predecessors of the end block,
+    * but before any jump instructions (return).
+    */
+   struct set_entry *entry;
+   set_foreach(end_block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+      b->cursor = nir_after_block_before_jump(pred);
+
+      nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+      nir_intrinsic_instr *set_vertex_count =
+         nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count);
+      set_vertex_count->src[0] = nir_src_for_ssa(count);
+
+      nir_builder_instr_insert(b, &set_vertex_count->instr);
+   }
+}
+
+bool
+nir_lower_gs_intrinsics(nir_shader *shader)
+{
+   struct state state;
+   state.progress = false;
+
+   /* Create the counter variable */
+   nir_variable *var = rzalloc(shader, nir_variable);
+   var->data.mode = nir_var_global;
+   var->type = glsl_uint_type();
+   var->name = "vertex_count";
+   var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */
+
+   exec_list_push_tail(&shader->globals, &var->node);
+   state.vertex_count_var = var;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_builder b;
+         nir_builder_init(&b, overload->impl);
+         state.builder = &b;
+
+         nir_foreach_block(overload->impl, rewrite_intrinsics, &state);
+
+         /* This only works because we have a single main() function. */
+         append_set_vertex_count(overload->impl->end_block, &state);
+
+         nir_metadata_preserve(overload->impl, 0);
+      }
+   }
+
+   return state.progress;
+}
-- 
cgit v1.2.3


From df31c1850d14729e27513ae733110a668f6b6e95 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 5 Aug 2015 09:16:59 -0700
Subject: i965/gs: Use new NIR intrinsics.

By performing the vertex counting in NIR, we're able to elide a ton of
useless safety checks around every EmitVertex() call:

total instructions in shared programs: 3952 -> 3720 (-5.87%)
instructions in affected programs:     3491 -> 3259 (-6.65%)
helped:                                11
HURT:                                  0

Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621)
on Haswell GT3e at 1024x768.

This should also make it easier to implement Broadwell's "Static Vertex
Count" feature someday.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c               |  5 ++++
 src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp     | 13 +++++++++--
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 ++++++++++++-----------
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp     | 28 ++++++++++++++---------
 4 files changed, 48 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index b47b87e07dd..1d4f6ab2ccd 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -96,6 +96,11 @@ brw_create_nir(struct brw_context *brw,
    }
    nir_validate_shader(nir);
 
+   if (stage == MESA_SHADER_GEOMETRY) {
+      nir_lower_gs_intrinsics(nir);
+      nir_validate_shader(nir);
+   }
+
    nir_lower_global_vars_to_local(nir);
    nir_validate_shader(nir);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index 8a8dd571e74..4f4e1e12fab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    src_reg src;
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_emit_vertex: {
+   case nir_intrinsic_emit_vertex_with_counter: {
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       int stream_id = instr->const_index[0];
       gs_emit_vertex(stream_id);
       break;
    }
 
-   case nir_intrinsic_end_primitive:
+   case nir_intrinsic_end_primitive_with_counter:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       gs_end_primitive();
       break;
 
+   case nir_intrinsic_set_vertex_count:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+      break;
+
    case nir_intrinsic_load_invocation_id: {
       src_reg invocation_id =
          src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index b9694f67787..7a5b945650c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
    if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
       return;
 
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
    {
       /* If we're outputting 32 control data bits or less, then we can wait
        * until the shader is over to output them all.  Otherwise we need to
@@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
           this->current_annotation = "emit vertex: Stream control data bits";
           set_stream_control_data_bits(stream_id);
       }
-
-      this->current_annotation = "emit vertex: increment vertex count";
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-               src_reg(1u)));
    }
-   emit(BRW_OPCODE_ENDIF);
 
    this->current_annotation = NULL;
 }
@@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
 void
 vec4_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 68e443d38a5..5cfff7b62ba 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog()
 void
 gen6_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
+
 void
 gen6_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "gen6 emit vertex";
-   /* Honor max_vertex layout indication in geometry shader by ignoring any
-    * vertices coming after c->gp->program.VerticesOut.
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
-            BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
+
    {
       /* Buffer all output slots for this vertex in vertex_output */
       for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
       }
       emit(ADD(dst_reg(this->vertex_output_offset),
                this->vertex_output_offset, 1u));
-
-      /* Update vertex count */
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
    }
-   emit(BRW_OPCODE_ENDIF);
 }
 
 void
-- 
cgit v1.2.3


From 31a36ffbc81a4dd79b91bf0fc59f0e5f8d44dbd7 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 3 Sep 2015 01:01:29 -0700
Subject: i965/gs: Fix extra level of indentation left by the previous commit.

I left a bunch of code indented a level in the previous patch to make
the diff easier to read.  But now we should fix that.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 124 +++++++++++-----------
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp     | 102 +++++++++---------
 2 files changed, 111 insertions(+), 115 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 7a5b945650c..3cb1b4c8793 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -484,76 +484,74 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
    if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
       return;
 
-   {
-      /* If we're outputting 32 control data bits or less, then we can wait
-       * until the shader is over to output them all.  Otherwise we need to
-       * output them as we go.  Now is the time to do it, since we're about to
-       * output the vertex_count'th vertex, so it's guaranteed that the
-       * control data bits associated with the (vertex_count - 1)th vertex are
-       * correct.
+   /* If we're outputting 32 control data bits or less, then we can wait
+    * until the shader is over to output them all.  Otherwise we need to
+    * output them as we go.  Now is the time to do it, since we're about to
+    * output the vertex_count'th vertex, so it's guaranteed that the
+    * control data bits associated with the (vertex_count - 1)th vertex are
+    * correct.
+    */
+   if (c->control_data_header_size_bits > 32) {
+      this->current_annotation = "emit vertex: emit control data bits";
+      /* Only emit control data bits if we've finished accumulating a batch
+       * of 32 bits.  This is the case when:
+       *
+       *     (vertex_count * bits_per_vertex) % 32 == 0
+       *
+       * (in other words, when the last 5 bits of vertex_count *
+       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
+       * integer n (which is always the case, since bits_per_vertex is
+       * always 1 or 2), this is equivalent to requiring that the last 5-n
+       * bits of vertex_count are 0:
+       *
+       *     vertex_count & (2^(5-n) - 1) == 0
+       *
+       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+       * equivalent to:
+       *
+       *     vertex_count & (32 / bits_per_vertex - 1) == 0
        */
-      if (c->control_data_header_size_bits > 32) {
-         this->current_annotation = "emit vertex: emit control data bits";
-         /* Only emit control data bits if we've finished accumulating a batch
-          * of 32 bits.  This is the case when:
-          *
-          *     (vertex_count * bits_per_vertex) % 32 == 0
-          *
-          * (in other words, when the last 5 bits of vertex_count *
-          * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
-          * integer n (which is always the case, since bits_per_vertex is
-          * always 1 or 2), this is equivalent to requiring that the last 5-n
-          * bits of vertex_count are 0:
-          *
-          *     vertex_count & (2^(5-n) - 1) == 0
-          *
-          * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
-          * equivalent to:
-          *
-          *     vertex_count & (32 / bits_per_vertex - 1) == 0
+      vec4_instruction *inst =
+         emit(AND(dst_null_d(), this->vertex_count,
+                  (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+      inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+      emit(IF(BRW_PREDICATE_NORMAL));
+      {
+         /* If vertex_count is 0, then no control data bits have been
+          * accumulated yet, so we skip emitting them.
           */
-         vec4_instruction *inst =
-            emit(AND(dst_null_d(), this->vertex_count,
-                     (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
-         inst->conditional_mod = BRW_CONDITIONAL_Z;
-
+         emit(CMP(dst_null_d(), this->vertex_count, 0u,
+                  BRW_CONDITIONAL_NEQ));
          emit(IF(BRW_PREDICATE_NORMAL));
-         {
-            /* If vertex_count is 0, then no control data bits have been
-             * accumulated yet, so we skip emitting them.
-             */
-            emit(CMP(dst_null_d(), this->vertex_count, 0u,
-                     BRW_CONDITIONAL_NEQ));
-            emit(IF(BRW_PREDICATE_NORMAL));
-            emit_control_data_bits();
-            emit(BRW_OPCODE_ENDIF);
-
-            /* Reset control_data_bits to 0 so we can start accumulating a new
-             * batch.
-             *
-             * Note: in the case where vertex_count == 0, this neutralizes the
-             * effect of any call to EndPrimitive() that the shader may have
-             * made before outputting its first vertex.
-             */
-            inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
-            inst->force_writemask_all = true;
-         }
+         emit_control_data_bits();
          emit(BRW_OPCODE_ENDIF);
+
+         /* Reset control_data_bits to 0 so we can start accumulating a new
+          * batch.
+          *
+          * Note: in the case where vertex_count == 0, this neutralizes the
+          * effect of any call to EndPrimitive() that the shader may have
+          * made before outputting its first vertex.
+          */
+         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+         inst->force_writemask_all = true;
       }
+      emit(BRW_OPCODE_ENDIF);
+   }
 
-      this->current_annotation = "emit vertex: vertex data";
-      emit_vertex();
+   this->current_annotation = "emit vertex: vertex data";
+   emit_vertex();
 
-      /* In stream mode we have to set control data bits for all vertices
-       * unless we have disabled control data bits completely (which we do
-       * do for GL_POINTS outputs that don't use streams).
-       */
-      if (c->control_data_header_size_bits > 0 &&
-          c->prog_data.control_data_format ==
-             GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
-          this->current_annotation = "emit vertex: Stream control data bits";
-          set_stream_control_data_bits(stream_id);
-      }
+   /* In stream mode we have to set control data bits for all vertices
+    * unless we have disabled control data bits completely (which we do
+    * do for GL_POINTS outputs that don't use streams).
+    */
+   if (c->control_data_header_size_bits > 0 &&
+       c->prog_data.control_data_format ==
+          GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+       this->current_annotation = "emit vertex: Stream control data bits";
+       set_stream_control_data_bits(stream_id);
    }
 
    this->current_annotation = NULL;
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 5cfff7b62ba..4c9c96028f8 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -172,64 +172,62 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "gen6 emit vertex";
 
-   {
-      /* Buffer all output slots for this vertex in vertex_output */
-      for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
-         int varying = prog_data->vue_map.slot_to_varying[slot];
-         if (varying != VARYING_SLOT_PSIZ) {
-            dst_reg dst(this->vertex_output);
-            dst.reladdr = ralloc(mem_ctx, src_reg);
-            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-            emit_urb_slot(dst, varying);
-         } else {
-            /* The PSIZ slot can pack multiple varyings in different channels
-             * and emit_urb_slot() will produce a MOV instruction for each of
-             * them. Since we are writing to an array, that will translate to
-             * possibly multiple MOV instructions with an array destination and
-             * each will generate a scratch write with the same offset into
-             * scratch space (thus, each one overwriting the previous). This is
-             * not what we want. What we will do instead is emit PSIZ to a
-             * a regular temporary register, then move that resgister into the
-             * array. This way we only have one instruction with an array
-             * destination and we only produce a single scratch write.
-             */
-            dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
-            emit_urb_slot(tmp, varying);
-            dst_reg dst(this->vertex_output);
-            dst.reladdr = ralloc(mem_ctx, src_reg);
-            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-            vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
-            inst->force_writemask_all = true;
-         }
-
-         emit(ADD(dst_reg(this->vertex_output_offset),
-                  this->vertex_output_offset, 1u));
-      }
-
-      /* Now buffer flags for this vertex */
-      dst_reg dst(this->vertex_output);
-      dst.reladdr = ralloc(mem_ctx, src_reg);
-      memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-      if (c->gp->program.OutputType == GL_POINTS) {
-         /* If we are outputting points, then every vertex has PrimStart and
-          * PrimEnd set.
-          */
-         emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
-                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
-         emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+   /* Buffer all output slots for this vertex in vertex_output */
+   for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+      int varying = prog_data->vue_map.slot_to_varying[slot];
+      if (varying != VARYING_SLOT_PSIZ) {
+         dst_reg dst(this->vertex_output);
+         dst.reladdr = ralloc(mem_ctx, src_reg);
+         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+         emit_urb_slot(dst, varying);
       } else {
-         /* Otherwise, we can only set the PrimStart flag, which we have stored
-          * in the first_vertex register. We will have to wait until we execute
-          * EndPrimitive() or we end the thread to set the PrimEnd flag on a
-          * vertex.
+         /* The PSIZ slot can pack multiple varyings in different channels
+          * and emit_urb_slot() will produce a MOV instruction for each of
+          * them. Since we are writing to an array, that will translate to
+          * possibly multiple MOV instructions with an array destination and
+          * each will generate a scratch write with the same offset into
+          * scratch space (thus, each one overwriting the previous). This is
+          * not what we want. What we will do instead is emit PSIZ to a
+          * a regular temporary register, then move that resgister into the
+          * array. This way we only have one instruction with an array
+          * destination and we only produce a single scratch write.
           */
-         emit(OR(dst, this->first_vertex,
-                 (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
-         emit(MOV(dst_reg(this->first_vertex), 0u));
+         dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+         emit_urb_slot(tmp, varying);
+         dst_reg dst(this->vertex_output);
+         dst.reladdr = ralloc(mem_ctx, src_reg);
+         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+         vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+         inst->force_writemask_all = true;
       }
+
       emit(ADD(dst_reg(this->vertex_output_offset),
                this->vertex_output_offset, 1u));
    }
+
+   /* Now buffer flags for this vertex */
+   dst_reg dst(this->vertex_output);
+   dst.reladdr = ralloc(mem_ctx, src_reg);
+   memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+   if (c->gp->program.OutputType == GL_POINTS) {
+      /* If we are outputting points, then every vertex has PrimStart and
+       * PrimEnd set.
+       */
+      emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+               URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
+      emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+   } else {
+      /* Otherwise, we can only set the PrimStart flag, which we have stored
+       * in the first_vertex register. We will have to wait until we execute
+       * EndPrimitive() or we end the thread to set the PrimEnd flag on a
+       * vertex.
+       */
+      emit(OR(dst, this->first_vertex,
+              (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+      emit(MOV(dst_reg(this->first_vertex), 0u));
+   }
+   emit(ADD(dst_reg(this->vertex_output_offset),
+            this->vertex_output_offset, 1u));
 }
 
 void
-- 
cgit v1.2.3


From 71e187430cdd1d82972554d7f9e994a41847bea1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 13 Aug 2015 15:39:40 -0400
Subject: i965: add ARB_texture_barrier support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 docs/GL3.txt                                 | 2 +-
 docs/relnotes/11.1.0.html                    | 1 +
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 src/mesa/drivers/dri/i965/intel_tex.c        | 9 +++++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 92941cf0d74..b0805f00ef5 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -195,7 +195,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_direct_state_access                           DONE (all drivers)
   GL_ARB_get_texture_sub_image                         DONE (all drivers)
   GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
+  GL_ARB_texture_barrier                               DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
   GL_KHR_robust_buffer_access_behavior                 not started
   GL_KHR_robustness                                    90% done (the ARB variant)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 89b9a191176..e28fab6060c 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 </ul>
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index e6d39e00788..1873827cacf 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -248,6 +248,7 @@ intelInitExtensions(struct gl_context *ctx)
    ctx->Extensions.MESA_pack_invert = true;
    ctx->Extensions.NV_conditional_render = true;
    ctx->Extensions.NV_primitive_restart = true;
+   ctx->Extensions.NV_texture_barrier = true;
    ctx->Extensions.NV_texture_env_combine4 = true;
    ctx->Extensions.NV_texture_rectangle = true;
    ctx->Extensions.TDFX_texture_compression_FXT1 = true;
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index e16b0def0d4..cac33ac64a7 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -357,6 +357,14 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx,
    return true;
 }
 
+static void
+intel_texture_barrier(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   brw_emit_mi_flush(brw);
+}
+
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
 {
@@ -372,4 +380,5 @@ intelInitTextureFuncs(struct dd_function_table *functions)
    functions->TextureView = intel_texture_view;
    functions->SetTextureStorageForBufferObject =
       intel_set_texture_storage_for_buffer_object;
+   functions->TextureBarrier = intel_texture_barrier;
 }
-- 
cgit v1.2.3


From 827d7948340f4cbc7de2d02971e4f36fbd52f7a8 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 23 Sep 2015 11:12:57 +1000
Subject: glsl: correctly detect inactive UBO arrays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the code was trying to get the packing type from the array not the
interface.

Cc: Ian Romanick <ian.d.romanick@intel.com>
Cc: Antia Puentes <apuentes@igalia.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/link_uniform_block_active_visitor.cpp | 6 ++----
 src/glsl/opt_dead_code.cpp                     | 7 ++-----
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp
index 510294783a0..72e07828a6d 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -77,9 +77,6 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
    if (!var->is_in_buffer_block())
       return visit_continue;
 
-   const glsl_type *const block_type = var->is_interface_instance()
-      ? var->type : var->get_interface_type();
-
    /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says:
     *
     *     "All members of a named uniform block declared with a shared or
@@ -88,7 +85,8 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
     *     also considered active, even if no member of the block is
     *     referenced."
     */
-   if (block_type->interface_packing == GLSL_INTERFACE_PACKING_PACKED)
+   if (var->get_interface_type()->interface_packing ==
+       GLSL_INTERFACE_PACKING_PACKED)
       return visit_continue;
 
    /* Process the block.  Bail if there was an error.
diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index e4bf874700c..2cb7f41adef 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -119,11 +119,8 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
              * layouts, do not eliminate it.
              */
             if (entry->var->is_in_buffer_block()) {
-               const glsl_type *const block_type =
-                  entry->var->is_interface_instance()
-                  ? entry->var->type : entry->var->get_interface_type();
-
-               if (block_type->interface_packing != GLSL_INTERFACE_PACKING_PACKED)
+               if (entry->var->get_interface_type()->interface_packing !=
+                   GLSL_INTERFACE_PACKING_PACKED)
                   continue;
             }
 
-- 
cgit v1.2.3


From 1614c39a8fc205d7b1cb5b16737c233fbcc5b678 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 23 Sep 2015 00:30:23 -0400
Subject: st/mesa: keep track of saturated writes when eliminating dead code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It doesn't matter whether a write is saturated or not, in another
implementation it might even have been a separate opcode. This code was
most likely copied from the copy-propagation pass (where one does have
to distinguish saturation).

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c3a8c119b1e..5896f6518b5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4144,8 +4144,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
        */
       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
-             !inst->dst[i].reladdr &&
-             !inst->saturate) {
+             !inst->dst[i].reladdr) {
             for (int c = 0; c < 4; c++) {
                if (inst->dst[i].writemask & (1 << c)) {
                   if (writes[4 * inst->dst[i].index + c]) {
-- 
cgit v1.2.3


From c8cb5ed93c8e7343390f188bbf1a8459380a5739 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 17 Sep 2015 03:22:53 -0700
Subject: st/xa: Fixups for PIPE_FORMAT_R8_UNORM A8 usage v2.

Check for PIPE_FORMAT_R8_UNORM when setting up the copy shader.
Also re-enable the dest alpha blending with A8 destination that
actually turned out to be correct.

Verified using rendercheck that the composite operators
overreverse, in, out, atop, atopreverse and xor seem to work fine
with a8 destiation.

v2: Fix a copy-paste error.

Reported-by: Jose Fonseca <jfonseca@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/state_trackers/xa/xa_composite.c | 34 ++++++++++++++++++++--------
 src/gallium/state_trackers/xa/xa_renderer.c  |  6 +++--
 2 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c
index e81eebaf541..bcb27ea1825 100644
--- a/src/gallium/state_trackers/xa/xa_composite.c
+++ b/src/gallium/state_trackers/xa/xa_composite.c
@@ -78,6 +78,27 @@ static const struct xa_composite_blend xa_blends[] = {
       0, 0, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ONE},
 };
 
+/*
+ * The alpha value stored in a L8 texture is read by the
+ * hardware as color, and R8 is read as red. The source alpha value
+ * at the end of the fragment shader is stored in all color channels,
+ * so the correct approach is to blend using DST_COLOR instead of
+ * DST_ALPHA and then output any color channel (L8) or the red channel (R8).
+ */
+static unsigned
+xa_convert_blend_for_luminance(unsigned factor)
+{
+    switch(factor) {
+    case PIPE_BLENDFACTOR_DST_ALPHA:
+	return PIPE_BLENDFACTOR_DST_COLOR;
+    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+	return PIPE_BLENDFACTOR_INV_DST_COLOR;
+    default:
+	break;
+    }
+    return factor;
+}
+
 static boolean
 blend_for_op(struct xa_composite_blend *blend,
 	     enum xa_composite_op op,
@@ -111,16 +132,11 @@ blend_for_op(struct xa_composite_blend *blend,
     if (!dst_pic->srf)
 	return supported;
 
-    /*
-     * None of the hardware formats we might use for dst A8 are
-     * suitable for dst_alpha blending, since they present the
-     * alpha channel either in all color channels (L8_UNORM) or
-     * in the red channel only (R8_UNORM)
-     */
     if ((dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM ||
-         dst_pic->srf->tex->format == PIPE_FORMAT_R8_UNORM) &&
-        blend->alpha_dst)
-        return FALSE;
+         dst_pic->srf->tex->format == PIPE_FORMAT_R8_UNORM)) {
+        blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
+        blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
+    }
 
     /*
      * If there's no dst alpha channel, adjust the blend op so that we'll treat
diff --git a/src/gallium/state_trackers/xa/xa_renderer.c b/src/gallium/state_trackers/xa/xa_renderer.c
index fda07e5b68e..bc55f877c48 100644
--- a/src/gallium/state_trackers/xa/xa_renderer.c
+++ b/src/gallium/state_trackers/xa/xa_renderer.c
@@ -465,9 +465,11 @@ renderer_copy_prepare(struct xa_context *r,
     }
 
     /* shaders */
-    if (src_texture->format == PIPE_FORMAT_L8_UNORM)
+    if (src_texture->format == PIPE_FORMAT_L8_UNORM ||
+        src_texture->format == PIPE_FORMAT_R8_UNORM)
 	fs_traits |= FS_SRC_LUMINANCE;
-    if (dst_surface->format == PIPE_FORMAT_L8_UNORM)
+    if (dst_surface->format == PIPE_FORMAT_L8_UNORM ||
+        dst_surface->format == PIPE_FORMAT_R8_UNORM)
 	fs_traits |= FS_DST_LUMINANCE;
     if (xa_format_a(dst_xa_format) != 0 &&
 	xa_format_a(src_xa_format) == 0)
-- 
cgit v1.2.3


From 200aee424790f3167fcb175f4798af27783fe364 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 28 Aug 2015 00:42:00 -0600
Subject: mesa: rework Driver.CopyImageSubData() and related code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, core Mesa's _mesa_CopyImageSubData() created temporary textures
to wrap renderbuffer sources/destinations.  This caused a bit of a mess in
the Mesa/gallium state tracker because we had to basically undo that
wrapping.

Instead, change ctx->Driver.CopyImageSubData() to take both gl_renderbuffer
and gl_texture_image src/dst pointers (one being null, the other non-null)
so the driver can handle renderbuffer vs. texture as needed.

For the i965 driver, we basically moved the code that wrapped textures
around renderbuffers from copyimage.c down into the met and driver code.

The old code in copyimage.c also made some questionable calls to
_mesa_BindTexture(), etc. which weren't undone at the end.

v2 (Jason Ekstrand): Rework the intel bits
v3 (Brian Paul): Update the temporary st_CopyImageSubData() function.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Tested-by: Nick Sarnie <commendsarnex@gmail.com>
---
 src/mesa/drivers/common/meta.h               |   2 +
 src/mesa/drivers/common/meta_copy_image.c    | 103 ++++++++-
 src/mesa/drivers/dri/i965/intel_copy_image.c |  80 +++++--
 src/mesa/main/copyimage.c                    | 301 +++++++++++++++------------
 src/mesa/main/dd.h                           |  15 +-
 src/mesa/state_tracker/st_cb_texture.c       |  33 ++-
 6 files changed, 363 insertions(+), 171 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index fe439153aa0..23fa209905d 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -494,8 +494,10 @@ _mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
 bool
 _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
                                          struct gl_texture_image *src_tex_image,
+                                         struct gl_renderbuffer *src_renderbuffer,
                                          int src_x, int src_y, int src_z,
                                          struct gl_texture_image *dst_tex_image,
+                                         struct gl_renderbuffer *dst_renderbuffer,
                                          int dst_x, int dst_y, int dst_z,
                                          int src_width, int src_height);
 
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 149ed18503c..33490ee6615 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -35,6 +35,46 @@
 #include "mtypes.h"
 #include "meta.h"
 
+/**
+ * Create a texture image that wraps a renderbuffer.
+ */
+static struct gl_texture_image *
+wrap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+   GLenum texTarget;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   if (rb->NumSamples > 1)
+      texTarget = GL_TEXTURE_2D_MULTISAMPLE;
+   else
+      texTarget = GL_TEXTURE_2D;
+
+   /* Texture ID is not significant since it never goes into the hash table */
+   texObj = ctx->Driver.NewTextureObject(ctx, 0, texTarget);
+   assert(texObj);
+   if (!texObj)
+      return NULL;
+
+   texImage = _mesa_get_tex_image(ctx, texObj, texTarget, 0);
+   assert(texImage);
+   if (!texImage)
+      return NULL;
+
+   if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {
+      _mesa_problem(ctx, "Failed to create texture from renderbuffer");
+      return NULL;
+   }
+
+   if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
+      rb->NeedsFinishRenderTexture = true;
+      ctx->Driver.FinishRenderTexture(ctx, rb);
+   }
+
+   return texImage;
+}
+
+
 /* This function makes a texture view without bothering with all of the API
  * checks.  Most of them are the same for CopyTexSubImage so checking would
  * be redundant.  The one major difference is that we don't check for
@@ -112,11 +152,15 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
 bool
 _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
                                          struct gl_texture_image *src_tex_image,
+                                         struct gl_renderbuffer *src_renderbuffer,
                                          int src_x, int src_y, int src_z,
                                          struct gl_texture_image *dst_tex_image,
+                                         struct gl_renderbuffer *dst_renderbuffer,
                                          int dst_x, int dst_y, int dst_z,
                                          int src_width, int src_height)
 {
+   mesa_format src_format, dst_format;
+   GLint src_internal_format, dst_internal_format;
    GLuint src_view_texture = 0;
    struct gl_texture_image *src_view_tex_image;
    GLuint fbos[2];
@@ -124,15 +168,37 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
    GLbitfield mask;
    GLenum status, attachment;
 
-   if (_mesa_is_format_compressed(dst_tex_image->TexFormat))
+   if (src_renderbuffer) {
+      src_format = src_renderbuffer->Format;
+      src_internal_format = src_renderbuffer->InternalFormat;
+   } else {
+      assert(src_tex_image);
+      src_format = src_tex_image->TexFormat;
+      src_internal_format = src_tex_image->InternalFormat;
+   }
+
+   if (dst_renderbuffer) {
+      dst_format = dst_renderbuffer->Format;
+      dst_internal_format = dst_renderbuffer->InternalFormat;
+   } else {
+      assert(dst_tex_image);
+      dst_format = dst_tex_image->TexFormat;
+      dst_internal_format = dst_tex_image->InternalFormat;
+   }
+
+   if (_mesa_is_format_compressed(src_format))
       return false;
 
-   if (_mesa_is_format_compressed(src_tex_image->TexFormat))
+   if (_mesa_is_format_compressed(dst_format))
       return false;
 
-   if (src_tex_image->InternalFormat == dst_tex_image->InternalFormat) {
+   if (src_internal_format == dst_internal_format) {
       src_view_tex_image = src_tex_image;
    } else {
+      if (src_renderbuffer) {
+         assert(src_tex_image == NULL);
+         src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer);
+      }
       if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture,
                      dst_tex_image->InternalFormat))
          goto cleanup;
@@ -145,7 +211,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
    _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
    _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);
 
-   switch (_mesa_get_format_base_format(src_tex_image->TexFormat)) {
+   switch (_mesa_get_format_base_format(src_format)) {
    case GL_DEPTH_COMPONENT:
       attachment = GL_DEPTH_ATTACHMENT;
       mask = GL_DEPTH_BUFFER_BIT;
@@ -165,15 +231,32 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
       _mesa_ReadBuffer(GL_COLOR_ATTACHMENT0);
    }
 
-   _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
-                             src_view_tex_image, src_z);
+   if (src_view_tex_image) {
+      /* Prever the tex image because, even if we have a renderbuffer, we may
+       * have had to wrap it in a texture view.
+       */
+      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
+                                src_view_tex_image, src_z);
+   } else {
+      _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER,
+                                    attachment,
+                                    GL_RENDERBUFFER,
+                                    src_renderbuffer->Name);
+   }
 
    status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER);
    if (status != GL_FRAMEBUFFER_COMPLETE)
       goto meta_end;
 
-   _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
-                             dst_tex_image, dst_z);
+   if (dst_renderbuffer) {
+      _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
+                                    attachment,
+                                    GL_RENDERBUFFER,
+                                    dst_renderbuffer->Name);
+   } else {
+      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
+                                dst_tex_image, dst_z);
+   }
 
    status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
    if (status != GL_FRAMEBUFFER_COMPLETE)
@@ -205,5 +288,9 @@ meta_end:
 cleanup:
    _mesa_DeleteTextures(1, &src_view_texture);
 
+   /* If we got a renderbuffer source, delete the temporary texture */
+   if (src_renderbuffer && src_tex_image)
+      ctx->Driver.DeleteTexture(ctx, src_tex_image->TexObject);
+
    return success;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index ac2738f59a0..d57651cef5f 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -25,10 +25,12 @@
  *    Jason Ekstrand <jason.ekstrand@intel.com>
  */
 
+#include "intel_fbo.h"
 #include "intel_tex.h"
 #include "intel_blit.h"
 #include "intel_mipmap_tree.h"
 #include "main/formats.h"
+#include "main/teximage.h"
 #include "drivers/common/meta.h"
 
 static bool
@@ -196,54 +198,86 @@ copy_image_with_memcpy(struct brw_context *brw,
    }
 }
 
+
 static void
 intel_copy_image_sub_data(struct gl_context *ctx,
                           struct gl_texture_image *src_image,
+                          struct gl_renderbuffer *src_renderbuffer,
                           int src_x, int src_y, int src_z,
                           struct gl_texture_image *dst_image,
+                          struct gl_renderbuffer *dst_renderbuffer,
                           int dst_x, int dst_y, int dst_z,
                           int src_width, int src_height)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_image *intel_src_image = intel_texture_image(src_image);
-   struct intel_texture_image *intel_dst_image = intel_texture_image(dst_image);
+   struct intel_mipmap_tree *src_mt, *dst_mt;
+   unsigned src_level, dst_level;
 
    if (_mesa_meta_CopyImageSubData_uncompressed(ctx,
-                                                src_image, src_x, src_y, src_z,
-                                                dst_image, dst_x, dst_y, dst_z,
+                                                src_image, src_renderbuffer,
+                                                src_x, src_y, src_z,
+                                                dst_image, dst_renderbuffer,
+                                                dst_x, dst_y, dst_z,
                                                 src_width, src_height)) {
       return;
    }
 
-   if (intel_src_image->mt->num_samples > 0 ||
-       intel_dst_image->mt->num_samples > 0) {
+   if (src_image) {
+      src_mt = intel_texture_image(src_image)->mt;
+   } else {
+      assert(src_renderbuffer);
+      src_mt = intel_renderbuffer(src_renderbuffer)->mt;
+      src_image = src_renderbuffer->TexImage;
+   }
+
+   if (dst_image) {
+      dst_mt = intel_texture_image(dst_image)->mt;
+   } else {
+      assert(dst_renderbuffer);
+      dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
+      src_image = src_renderbuffer->TexImage;
+   }
+
+   if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
       _mesa_problem(ctx, "Failed to copy multisampled texture with meta path\n");
       return;
    }
 
-   /* Cube maps actually have different images per face */
-   if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      src_z = src_image->Face;
-   if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      dst_z = dst_image->Face;
+   if (src_image) {
+      src_level = src_image->Level + src_image->TexObject->MinLevel;
+
+      /* Cube maps actually have different images per face */
+      if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+         src_z = src_image->Face;
+   } else {
+      src_level = 0;
+   }
+
+   if (dst_image) {
+      dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
+
+      /* Cube maps actually have different images per face */
+      if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+         dst_z = dst_image->Face;
+   } else {
+      dst_level = 0;
+   }
 
    /* We are now going to try and copy the texture using the blitter.  If
     * that fails, we will fall back mapping the texture and using memcpy.
     * In either case, we need to do a full resolve.
     */
-   intel_miptree_all_slices_resolve_hiz(brw, intel_src_image->mt);
-   intel_miptree_all_slices_resolve_depth(brw, intel_src_image->mt);
-   intel_miptree_resolve_color(brw, intel_src_image->mt);
+   intel_miptree_all_slices_resolve_hiz(brw, src_mt);
+   intel_miptree_all_slices_resolve_depth(brw, src_mt);
+   intel_miptree_resolve_color(brw, src_mt);
 
-   intel_miptree_all_slices_resolve_hiz(brw, intel_dst_image->mt);
-   intel_miptree_all_slices_resolve_depth(brw, intel_dst_image->mt);
-   intel_miptree_resolve_color(brw, intel_dst_image->mt);
+   intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
+   intel_miptree_all_slices_resolve_depth(brw, dst_mt);
+   intel_miptree_resolve_color(brw, dst_mt);
 
-   unsigned src_level = src_image->Level + src_image->TexObject->MinLevel;
-   unsigned dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
-   if (copy_image_with_blitter(brw, intel_src_image->mt, src_level,
+   if (copy_image_with_blitter(brw, src_mt, src_level,
                                src_x, src_y, src_z,
-                               intel_dst_image->mt, dst_level,
+                               dst_mt, dst_level,
                                dst_x, dst_y, dst_z,
                                src_width, src_height))
       return;
@@ -251,9 +285,9 @@ intel_copy_image_sub_data(struct gl_context *ctx,
    /* This is a worst-case scenario software fallback that maps the two
     * textures and does a memcpy between them.
     */
-   copy_image_with_memcpy(brw, intel_src_image->mt, src_level,
+   copy_image_with_memcpy(brw, src_mt, src_level,
                           src_x, src_y, src_z,
-                          intel_dst_image->mt, dst_level,
+                          dst_mt, dst_level,
                           dst_x, dst_y, dst_z,
                           src_width, src_height);
 }
diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
index 05bc50dd2c6..f02e842f34d 100644
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -41,22 +41,27 @@ enum mesa_block_class {
 };
 
 /**
- * Prepare the source or destination resource, including:
- * - Error checking
- * - Creating texture wrappers for renderbuffers
+ * Prepare the source or destination resource.  This involves error
+ * checking and returning the relevant gl_texture_image or gl_renderbuffer.
+ * Note that one of the resulting tex_image or renderbuffer pointers will be
+ * NULL and the other will be non-null.
+ *
  * \param name  the texture or renderbuffer name
- * \param target  GL_TEXTURE target or GL_RENDERBUFFER.  For the later, will
- *                be changed to a compatible GL_TEXTURE target.
+ * \param target  One of GL_TEXTURE_x target or GL_RENDERBUFFER
  * \param level  mipmap level
- * \param tex_obj  returns a pointer to a texture object
+ * \param z  src or dest Z
+ * \param depth  number of slices/faces/layers to copy
  * \param tex_image  returns a pointer to a texture image
- * \param tmp_tex  returns temporary texture object name
+ * \param renderbuffer  returns a pointer to a renderbuffer
  * \return true if success, false if error
  */
 static bool
-prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
-               struct gl_texture_object **tex_obj,
-               struct gl_texture_image **tex_image, GLuint *tmp_tex,
+prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
+               int level, int z, int depth,
+               struct gl_texture_image **tex_image,
+               struct gl_renderbuffer **renderbuffer,
+               mesa_format *format,
+               GLenum *internalFormat,
                const char *dbg_prefix)
 {
    if (name == 0) {
@@ -72,7 +77,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
     *   - is TEXTURE_BUFFER, or
     *   - is one of the cubemap face selectors described in table 3.17,
     */
-   switch (*target) {
+   switch (target) {
    case GL_RENDERBUFFER:
       /* Not a texture target, but valid */
    case GL_TEXTURE_1D:
@@ -93,12 +98,13 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
    default:
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                  _mesa_enum_to_string(*target));
+                  _mesa_enum_to_string(target));
       return false;
    }
 
-   if (*target == GL_RENDERBUFFER) {
+   if (target == GL_RENDERBUFFER) {
       struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+
       if (!rb) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sName = %u)", dbg_prefix, name);
@@ -117,49 +123,38 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
          return false;
       }
 
-      if (rb->NumSamples > 1)
-         *target = GL_TEXTURE_2D_MULTISAMPLE;
-      else
-         *target = GL_TEXTURE_2D;
-
-      *tmp_tex = 0;
-      _mesa_GenTextures(1, tmp_tex);
-      if (*tmp_tex == 0)
-         return false; /* Error already set by GenTextures */
-
-      _mesa_BindTexture(*target, *tmp_tex);
-      *tex_obj = _mesa_lookup_texture(ctx, *tmp_tex);
-      *tex_image = _mesa_get_tex_image(ctx, *tex_obj, *target, 0);
-
-      if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, *tex_image)) {
-         _mesa_problem(ctx, "Failed to create texture from renderbuffer");
-         return false;
-      }
-
-      if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
-         rb->NeedsFinishRenderTexture = true;
-         ctx->Driver.FinishRenderTexture(ctx, rb);
-      }
+      *renderbuffer = rb;
+      *format = rb->Format;
+      *internalFormat = rb->InternalFormat;
+      *tex_image = NULL;
    } else {
-      *tex_obj = _mesa_lookup_texture(ctx, name);
-      if (!*tex_obj) {
+      struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+
+      if (!texObj) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sName = %u)", dbg_prefix, name);
          return false;
       }
 
-      _mesa_test_texobj_completeness(ctx, *tex_obj);
-      if (!(*tex_obj)->_BaseComplete ||
-          (level != 0 && !(*tex_obj)->_MipmapComplete)) {
+      _mesa_test_texobj_completeness(ctx, texObj);
+      if (!texObj->_BaseComplete ||
+          (level != 0 && !texObj->_MipmapComplete)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glCopyImageSubData(%sName incomplete)", dbg_prefix);
          return false;
       }
 
-      if ((*tex_obj)->Target != *target) {
-         _mesa_error(ctx, GL_INVALID_ENUM,
+      /* Note that target will not be a cube face name */
+      if (texObj->Target != target) {
+         /*
+          * From GL_ARB_copy_image specification:
+          * "INVALID_VALUE is generated if either <srcName> or <dstName> does
+          * not correspond to a valid renderbuffer or texture object according
+          * to the corresponding target parameter."
+          */
+         _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                     _mesa_enum_to_string(*target));
+                     _mesa_enum_to_string(target));
          return false;
       }
 
@@ -169,12 +164,36 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
          return false;
       }
 
-      *tex_image = _mesa_select_tex_image(*tex_obj, *target, level);
+      if (target == GL_TEXTURE_CUBE_MAP) {
+         int i;
+
+         assert(z < MAX_FACES);  /* should have been caught earlier */
+
+         /* make sure all the cube faces are present */
+         for (i = 0; i < depth; i++) {
+            if (!texObj->Image[z+i][level]) {
+               /* missing cube face */
+               _mesa_error(ctx, GL_INVALID_OPERATION,
+                           "glCopyImageSubData(missing cube face)");
+               return false;
+            }
+         }
+
+         *tex_image = texObj->Image[z][level];
+      }
+      else {
+         *tex_image = _mesa_select_tex_image(texObj, target, level);
+      }
+
       if (!*tex_image) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sLevel = %u)", dbg_prefix, level);
          return false;
       }
+
+      *renderbuffer = NULL;
+      *format = (*tex_image)->TexFormat;
+      *internalFormat = (*tex_image)->InternalFormat;
    }
 
    return true;
@@ -188,10 +207,14 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
  */
 static bool
 check_region_bounds(struct gl_context *ctx,
+                    GLenum target,
                     const struct gl_texture_image *tex_image,
+                    const struct gl_renderbuffer *renderbuffer,
                     int x, int y, int z, int width, int height, int depth,
                     const char *dbg_prefix)
 {
+   int surfWidth, surfHeight, surfDepth;
+
    if (width < 0 || height < 0 || depth < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(%sWidth, %sHeight, or %sDepth is negative)",
@@ -207,7 +230,14 @@ check_region_bounds(struct gl_context *ctx,
    }
 
    /* Check X direction */
-   if (x + width > tex_image->Width) {
+   if (target == GL_RENDERBUFFER) {
+      surfWidth = renderbuffer->Width;
+   }
+   else {
+      surfWidth = tex_image->Width;
+   }
+
+   if (x + width > surfWidth) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(%sX or %sWidth exceeds image bounds)",
                   dbg_prefix, dbg_prefix);
@@ -215,66 +245,49 @@ check_region_bounds(struct gl_context *ctx,
    }
 
    /* Check Y direction */
-   switch (tex_image->TexObject->Target) {
+   switch (target) {
+   case GL_RENDERBUFFER:
+      surfHeight = renderbuffer->Height;
+      break;
    case GL_TEXTURE_1D:
    case GL_TEXTURE_1D_ARRAY:
-      if (y != 0 || height != 1) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfHeight = 1;
       break;
    default:
-      if (y + height > tex_image->Height) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
-      break;
+      surfHeight = tex_image->Height;
+   }
+
+   if (y + height > surfHeight) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
+                  dbg_prefix, dbg_prefix);
+      return false;
    }
 
    /* Check Z direction */
-   switch (tex_image->TexObject->Target) {
+   switch (target) {
+   case GL_RENDERBUFFER:
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_2D_MULTISAMPLE:
    case GL_TEXTURE_RECTANGLE:
-      if (z != 0 || depth != 1) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = 1;
       break;
    case GL_TEXTURE_CUBE_MAP:
-      if (z < 0 || z + depth > 6) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = 6;
       break;
    case GL_TEXTURE_1D_ARRAY:
-      if (z < 0 || z + depth > tex_image->Height) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
-      break;
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-   case GL_TEXTURE_2D_ARRAY:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_TEXTURE_3D:
-      if (z < 0 || z + depth > tex_image->Depth) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = tex_image->Height;
       break;
+   default:
+      surfDepth = tex_image->Depth;
+   }
+
+   if (z < 0 || z + depth > surfDepth) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
+                  dbg_prefix, dbg_prefix);
+      return false;
    }
 
    return true;
@@ -406,10 +419,12 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
                        GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth)
 {
    GET_CURRENT_CONTEXT(ctx);
-   GLuint tmpTexNames[2] = { 0, 0 };
-   struct gl_texture_object *srcTexObj, *dstTexObj;
    struct gl_texture_image *srcTexImage, *dstTexImage;
+   struct gl_renderbuffer *srcRenderbuffer, *dstRenderbuffer;
+   mesa_format srcFormat, dstFormat;
+   GLenum srcIntFormat, dstIntFormat;
    GLuint src_bw, src_bh, dst_bw, dst_bh;
+   int dstWidth, dstHeight, dstDepth;
    int i;
 
    if (MESA_VERBOSE & VERBOSE_API)
@@ -420,7 +435,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
                   srcX, srcY, srcZ,
                   dstName, _mesa_enum_to_string(dstTarget), dstLevel,
                   dstX, dstY, dstZ,
-                  srcWidth, srcHeight, srcWidth);
+                  srcWidth, srcHeight, srcDepth);
 
    if (!ctx->Extensions.ARB_copy_image) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -428,67 +443,93 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
       return;
    }
 
-   if (!prepare_target(ctx, srcName, &srcTarget, srcLevel,
-                       &srcTexObj, &srcTexImage, &tmpTexNames[0], "src"))
-      goto cleanup;
+   if (!prepare_target(ctx, srcName, srcTarget, srcLevel, srcZ, srcDepth,
+                       &srcTexImage, &srcRenderbuffer, &srcFormat,
+                       &srcIntFormat, "src"))
+      return;
 
-   if (!prepare_target(ctx, dstName, &dstTarget, dstLevel,
-                       &dstTexObj, &dstTexImage, &tmpTexNames[1], "dst"))
-      goto cleanup;
+   if (!prepare_target(ctx, dstName, dstTarget, dstLevel, dstZ, srcDepth,
+                       &dstTexImage, &dstRenderbuffer, &dstFormat,
+                       &dstIntFormat, "dst"))
+      return;
 
-   _mesa_get_format_block_size(srcTexImage->TexFormat, &src_bw, &src_bh);
+   _mesa_get_format_block_size(srcFormat, &src_bw, &src_bh);
    if ((srcX % src_bw != 0) || (srcY % src_bh != 0) ||
        (srcWidth % src_bw != 0) || (srcHeight % src_bh != 0)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(unaligned src rectangle)");
-      goto cleanup;
+      return;
    }
 
-   _mesa_get_format_block_size(dstTexImage->TexFormat, &dst_bw, &dst_bh);
+   _mesa_get_format_block_size(dstFormat, &dst_bw, &dst_bh);
    if ((dstX % dst_bw != 0) || (dstY % dst_bh != 0)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(unaligned dst rectangle)");
-      goto cleanup;
+      return;
    }
 
-   if (!check_region_bounds(ctx, srcTexImage, srcX, srcY, srcZ,
-                            srcWidth, srcHeight, srcDepth, "src"))
-      goto cleanup;
+   /* From the GL_ARB_copy_image spec:
+    *
+    * "The dimensions are always specified in texels, even for compressed
+    * texture formats. But it should be noted that if only one of the
+    * source and destination textures is compressed then the number of
+    * texels touched in the compressed image will be a factor of the
+    * block size larger than in the uncompressed image."
+    *
+    * So, if copying from compressed to uncompressed, the dest region is
+    * shrunk by the src block size factor.  If copying from uncompressed
+    * to compressed, the dest region is grown by the dest block size factor.
+    * Note that we're passed the _source_ width, height, depth and those
+    * dimensions are never changed.
+    */
+   dstWidth = srcWidth * dst_bw / src_bw;
+   dstHeight = srcHeight * dst_bh / src_bh;
+   dstDepth = srcDepth;
+
+   if (!check_region_bounds(ctx, srcTarget, srcTexImage, srcRenderbuffer,
+                            srcX, srcY, srcZ, srcWidth, srcHeight, srcDepth,
+                            "src"))
+      return;
 
-   if (!check_region_bounds(ctx, dstTexImage, dstX, dstY, dstZ,
-                            (srcWidth / src_bw) * dst_bw,
-                            (srcHeight / src_bh) * dst_bh, srcDepth, "dst"))
-      goto cleanup;
+   if (!check_region_bounds(ctx, dstTarget, dstTexImage, dstRenderbuffer,
+                            dstX, dstY, dstZ, dstWidth, dstHeight, dstDepth,
+                            "dst"))
+      return;
 
-   if (!copy_format_compatible(ctx, srcTexImage->InternalFormat,
-                               dstTexImage->InternalFormat)) {
+   if (!copy_format_compatible(ctx, srcIntFormat, dstIntFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glCopyImageSubData(internalFormat mismatch)");
-      goto cleanup;
+      return;
    }
 
+   /* loop over 2D slices/faces/layers */
    for (i = 0; i < srcDepth; ++i) {
-      int srcNewZ, dstNewZ;
-
-      if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) {
-         srcTexImage = srcTexObj->Image[i + srcZ][srcLevel];
-         srcNewZ = 0;
-      } else {
-         srcNewZ = srcZ + i;
+      int newSrcZ = srcZ + i;
+      int newDstZ = dstZ + i;
+
+      if (srcTexImage &&
+          srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+         /* need to update srcTexImage pointer for the cube face */
+         assert(srcZ + i < MAX_FACES);
+         srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
+         assert(srcTexImage);
+         newSrcZ = 0;
       }
 
-      if (dstTexObj->Target == GL_TEXTURE_CUBE_MAP) {
-         dstTexImage = dstTexObj->Image[i + dstZ][dstLevel];
-         dstNewZ = 0;
-      } else {
-         dstNewZ = dstZ + i;
+      if (dstTexImage &&
+          dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+         /* need to update dstTexImage pointer for the cube face */
+         assert(dstZ + i < MAX_FACES);
+         dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
+         assert(dstTexImage);
+         newDstZ = 0;
       }
 
-      ctx->Driver.CopyImageSubData(ctx, srcTexImage, srcX, srcY, srcNewZ,
-                                   dstTexImage, dstX, dstY, dstNewZ,
+      ctx->Driver.CopyImageSubData(ctx,
+                                   srcTexImage, srcRenderbuffer,
+                                   srcX, srcY, newSrcZ,
+                                   dstTexImage, dstRenderbuffer,
+                                   dstX, dstY, newDstZ,
                                    srcWidth, srcHeight);
    }
-
-cleanup:
-   _mesa_DeleteTextures(2, tmpTexNames);
 }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 87eb63ea374..2c746fc45de 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -269,20 +269,25 @@ struct dd_function_table {
                            struct gl_renderbuffer *rb,
                            GLint x, GLint y,
                            GLsizei width, GLsizei height);
-
    /**
     * Called by glCopyImageSubData().
     *
-    * This function should copy one 2-D slice from srcTexImage to
-    * dstTexImage.  If one of the textures is 3-D or is a 1-D or 2-D array
+    * This function should copy one 2-D slice from src_teximage or
+    * src_renderbuffer to dst_teximage or dst_renderbuffer.  Either the
+    * teximage or renderbuffer pointer will be non-null to indicate which
+    * is the real src/dst.
+    *
+    * If one of the textures is 3-D or is a 1-D or 2-D array
     * texture, this function will be called multiple times: once for each
     * slice.  If one of the textures is a cube map, this function will be
     * called once for each face to be copied.
     */
    void (*CopyImageSubData)(struct gl_context *ctx,
-                            struct gl_texture_image *src_image,
+                            struct gl_texture_image *src_teximage,
+                            struct gl_renderbuffer *src_renderbuffer,
                             int src_x, int src_y, int src_z,
-                            struct gl_texture_image *dstTexImage,
+                            struct gl_texture_image *dst_teximage,
+                            struct gl_renderbuffer *dst_renderbuffer,
                             int dst_x, int dst_y, int dst_z,
                             int src_width, int src_height);
 
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 93335aefe6c..5d25fed317e 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1879,22 +1879,45 @@ st_TextureView(struct gl_context *ctx,
 static void
 st_CopyImageSubData(struct gl_context *ctx,
                     struct gl_texture_image *src_image,
+                    struct gl_renderbuffer *src_renderbuffer,
                     int src_x, int src_y, int src_z,
                     struct gl_texture_image *dst_image,
+                    struct gl_renderbuffer *dst_renderbuffer,
                     int dst_x, int dst_y, int dst_z,
                     int src_width, int src_height)
 {
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
-   struct st_texture_image *src = st_texture_image(src_image);
-   struct st_texture_image *dst = st_texture_image(dst_image);
-
+   struct pipe_resource *src_res, *dst_res;
    struct pipe_box box;
+   int src_level, dst_level;
+
+   if (src_image) {
+      struct st_texture_image *src = st_texture_image(src_image);
+      src_res = src->pt;
+      src_level = src_image->Level;
+   }
+   else {
+      struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
+      src_res = src->texture;
+      src_level = 0;
+   }
+
+   if (dst_image) {
+      struct st_texture_image *dst = st_texture_image(dst_image);
+      dst_res = dst->pt;
+      dst_level = dst_image->Level;
+   }
+   else {
+      struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
+      dst_res = dst->texture;
+      dst_level = 0;
+   }
 
    u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
-   pipe->resource_copy_region(pipe, dst->pt, dst_image->Level,
+   pipe->resource_copy_region(pipe, dst_res, dst_level,
                               dst_x, dst_y, dst_z,
-                              src->pt, src_image->Level,
+                              src_res, src_level,
                               &box);
 }
 
-- 
cgit v1.2.3


From 2ea16966ae66d4dd5c5dcb996d7996d9c734bbee Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Wed, 23 Sep 2015 16:57:47 -0700
Subject: i965: Respect stride and subreg_offset for ATTR registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we assign hw regs to attributes, we don't incorporate the stride
and subreg_offset from the fs_reg. It's rarely used, but the integer
multiplication lowering uses unusual stride and subreg_offset
combination breaks when one source is an attribute.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91970
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 225a3122c79..618bbd2866a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1564,7 +1564,10 @@ fs_visitor::assign_vs_urb_setup()
 
             inst->src[i].file = HW_REG;
             inst->src[i].fixed_hw_reg =
-               retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+               stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                  inst->src[i].subreg_offset),
+                      inst->exec_size * inst->src[i].stride,
+                      inst->exec_size, inst->src[i].stride);
          }
       }
    }
-- 
cgit v1.2.3


From 21c1c7ff81cb3cca9c1f5d907fe6c17219b5a137 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Thu, 24 Sep 2015 10:51:10 -0700
Subject: wayland: Add copyright notice for wayland-egl.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/egl/wayland/wayland-egl/wayland-egl.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'src')

diff --git a/src/egl/wayland/wayland-egl/wayland-egl.c b/src/egl/wayland/wayland-egl/wayland-egl.c
index ae78595b92c..80a5be5e32c 100644
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -1,3 +1,32 @@
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
 #include <stdlib.h>
 
 #include <wayland-client.h>
-- 
cgit v1.2.3


From f3a081953393c7d40bd8df9ec22a2551d01098f5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 23 Sep 2015 02:33:24 +0200
Subject: st/mesa: fix front buffer regression after dropping st_validate_state
 in Blit

Broken by: d082c5324914212f76e45be497229c7a0681f706
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92072

Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>
Tested-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/state_tracker/st_cb_blit.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 4fdef7fb5a6..a05a5aff4c6 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -229,6 +229,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                   st_adjust_blit_for_msaa_resolve(&blit);
 
                   st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
                }
             }
          }
@@ -266,6 +267,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                   st_adjust_blit_for_msaa_resolve(&blit);
 
                   st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
                }
             }
          }
-- 
cgit v1.2.3


From 29dff6f67656c8e6e09249cc76b6efa0c03353a7 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 17:42:31 +0200
Subject: radeonsi: handle index buffer alloc failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8cb98d7317c..b4c59f805e5 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -782,6 +782,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 			u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
 				       &out_offset, &out_buffer, &ptr);
+			if (!out_buffer) {
+				pipe_resource_reference(&ib.buffer, NULL);
+				return;
+			}
 
 			util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
 							   ib.offset + start_offset,
@@ -802,6 +806,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
 				      (char*)ib.user_buffer + start_offset,
 				      &ib.offset, &ib.buffer);
+			if (!ib.buffer)
+				return;
 			/* info->start will be added by the drawing code */
 			ib.offset -= start_offset;
 		}
-- 
cgit v1.2.3


From cd27ff6a0f85ca35f0f7f2fa7971692e0028e2ed Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 17:42:31 +0200
Subject: radeonsi: handle constant buffer alloc failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 92a7068e715..b07ab3b94ac 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -472,7 +472,8 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
 
 	u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
 		       (struct pipe_resource**)rbuffer, &tmp);
-	util_memcpy_cpu_to_le32(tmp, ptr, size);
+	if (rbuffer)
+		util_memcpy_cpu_to_le32(tmp, ptr, size);
 }
 
 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -504,6 +505,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 			si_upload_const_buffer(sctx,
 					       (struct r600_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
+			if (!buffer) {
+				/* Just unbind on failure. */
+				si_set_constant_buffer(ctx, shader, slot, NULL);
+				return;
+			}
 			va = r600_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
-- 
cgit v1.2.3


From 282b3780123bace557fc90127bd35b075ea0873e Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 17:53:28 +0200
Subject: gallium/radeon: handle buffer_map staging buffer failures better
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 4adcccbb8ed..f341ecb41a5 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -305,12 +305,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
 								ptransfer, data, staging, offset);
-			} else {
-				return NULL; /* error, shouldn't occur though */
 			}
+		} else {
+			/* At this point, the buffer is always idle (we checked it above). */
+			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 		}
-		/* At this point, the buffer is always idle (we checked it above). */
-		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 	}
 	/* Using a staging buffer in GTT for larger reads is much faster. */
 	else if ((usage & PIPE_TRANSFER_READ) &&
-- 
cgit v1.2.3


From f95e695059c428a21a1e1a27d5cd5ccce2a97b0e Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 17:54:41 +0200
Subject: gallium/radeon: handle buffer alloc failures in r600_draw_rectangle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 836da76c0bc..d97b721a12f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -78,6 +78,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 	 * I guess the 4th one is derived from the first 3.
 	 * The vertex specification should match u_blitter's vertex element state. */
 	u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+	if (!buf)
+		return;
+
 	vb[0] = x1;
 	vb[1] = y1;
 	vb[2] = depth;
-- 
cgit v1.2.3


From dea834e639715caa9517a695a3fb04d5de9aa069 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:14:36 +0200
Subject: gallium/radeon: add a fail path for depth MSAA texture readback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_texture.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index a0259160f8c..fc69f48bb70 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -989,6 +989,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 
 			if (usage & PIPE_TRANSFER_READ) {
 				struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+				if (!temp) {
+					R600_ERR("failed to create a temporary depth texture\n");
+					FREE(trans);
+					return NULL;
+				}
 
 				r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
 				rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
-- 
cgit v1.2.3


From 394d67a58f949245e8b3fad400e9efaa5829ec84 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:15:40 +0200
Subject: radeonsi: report alloc failure from si_shader_binary_read
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2e49a215763..a3df64824c6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3774,11 +3774,14 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
 	const struct radeon_shader_binary *binary = &shader->binary;
 	unsigned i;
+	int r;
 	bool dump  = r600_can_dump_shader(&sscreen->b,
 		shader->selector ? shader->selector->tokens : NULL);
 
 	si_shader_binary_read_config(sscreen, shader, 0);
-	si_shader_binary_upload(sscreen, shader);
+	r = si_shader_binary_upload(sscreen, shader);
+	if (r)
+		return r;
 
 	if (dump) {
 		if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
-- 
cgit v1.2.3


From 5c219ab55239ceef3285262ff68a502e419061e0 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:16:26 +0200
Subject: radeonsi: add malloc fail paths to si_create_shader_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index db45cc181c6..1f1965f6a56 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -666,8 +666,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
 	int i;
 
+	if (!sel)
+		return NULL;
+
 	sel->type = pipe_shader_type;
 	sel->tokens = tgsi_dup_tokens(state->tokens);
+	if (!sel->tokens) {
+		FREE(sel);
+		return NULL;
+	}
+
 	sel->so = state->stream_output;
 	tgsi_scan_shader(state->tokens, &sel->info);
 	p_atomic_inc(&sscreen->b.num_shaders_created);
-- 
cgit v1.2.3


From 22d3ccf5a814bfc768e373d0c983a356f4e4efe3 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:27:53 +0200
Subject: radeonsi: skip drawing if the tess factor ring allocation fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.h         |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c    |  4 ++--
 src/gallium/drivers/radeonsi/si_state_shaders.c | 11 +++++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 900b70f804c..3fc0799c2b4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -277,7 +277,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      unsigned force_level);
 
 /* si_state_shader.c */
-void si_update_shaders(struct si_context *sctx);
+bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
 
 /* si_state_draw.c */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b4c59f805e5..6d8e0e509bf 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -759,8 +759,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	else
 		sctx->current_rast_prim = info->mode;
 
-	si_update_shaders(sctx);
-	if (!si_upload_shader_descriptors(sctx))
+	if (!si_update_shaders(sctx) ||
+	    !si_upload_shader_descriptors(sctx))
 		return;
 
 	if (info->indexed) {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 1f1965f6a56..11b58e8b3ca 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1279,6 +1279,9 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_DEFAULT,
 					   32768 * sctx->screen->b.info.max_se);
+	if (!sctx->tf_ring)
+		return;
+
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
 	/* Append these registers to the init config state. */
@@ -1385,15 +1388,18 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
 	sctx->b.streamout.stride_in_dw = shader->so.stride;
 }
 
-void si_update_shaders(struct si_context *sctx)
+bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
 	/* Update stages before GS. */
 	if (sctx->tes_shader) {
-		if (!sctx->tf_ring)
+		if (!sctx->tf_ring) {
 			si_init_tess_factor_ring(sctx);
+			if (!sctx->tf_ring)
+				return false;
+		}
 
 		/* VS as LS */
 		si_shader_select(ctx, sctx->vs_shader);
@@ -1487,6 +1493,7 @@ void si_update_shaders(struct si_context *sctx)
 		if (sctx->b.chip_class == SI)
 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
+	return true;
 }
 
 void si_init_shader_functions(struct si_context *sctx)
-- 
cgit v1.2.3


From 263f5a2cf97e455e48dbd7728cb0ac10fd699746 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:27:53 +0200
Subject: radeonsi: skip drawing if GS ring allocations fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 11b58e8b3ca..bc7fdb3e94a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1069,9 +1069,15 @@ static void si_init_gs_rings(struct si_context *sctx)
 
 	sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 				       PIPE_USAGE_DEFAULT, esgs_ring_size);
+	if (!sctx->esgs_ring)
+		return;
 
 	sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
+	if (!sctx->gsvs_ring) {
+		pipe_resource_reference(&sctx->esgs_ring, NULL);
+		return;
+	}
 
 	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
@@ -1443,8 +1449,11 @@ bool si_update_shaders(struct si_context *sctx)
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader);
 
-		if (!sctx->gsvs_ring)
+		if (!sctx->gsvs_ring) {
 			si_init_gs_rings(sctx);
+			if (!sctx->gsvs_ring)
+				return false;
+		}
 
 		si_update_gs_rings(sctx);
 	} else {
-- 
cgit v1.2.3


From 5dbadb02572f875f5d2f22327eab6b93de61337c Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:30:41 +0200
Subject: radeonsi: handle shader precompile failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bc7fdb3e94a..74e113668aa 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -736,7 +736,12 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	}
 
 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
-		si_shader_select(ctx, sel);
+		if (si_shader_select(ctx, sel)) {
+			fprintf(stderr, "radeonsi: can't create a shader\n");
+			tgsi_free_tokens(sel->tokens);
+			FREE(sel);
+			return NULL;
+		}
 
 	return sel;
 }
-- 
cgit v1.2.3


From 9b6d9dd7d8d2779e2bd08787c26f51512b84f6d2 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:31:33 +0200
Subject: radeonsi: handle fixed-func TCS shader create failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 74e113668aa..43d2e88d102 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1351,7 +1351,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
 
 	sctx->fixed_func_tcs_shader =
 		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
-	assert(sctx->fixed_func_tcs_shader);
 }
 
 static void si_update_vgt_shader_config(struct si_context *sctx)
@@ -1420,8 +1419,12 @@ bool si_update_shaders(struct si_context *sctx)
 			si_shader_select(ctx, sctx->tcs_shader);
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 		} else {
-			if (!sctx->fixed_func_tcs_shader)
+			if (!sctx->fixed_func_tcs_shader) {
 				si_generate_fixed_func_tcs(sctx);
+				if (!sctx->fixed_func_tcs_shader)
+					return false;
+			}
+
 			si_shader_select(ctx, sctx->fixed_func_tcs_shader);
 			si_pm4_bind_state(sctx, hs,
 					  sctx->fixed_func_tcs_shader->current->pm4);
-- 
cgit v1.2.3


From 237d7cccce15b91590afa90020ff71324217fdb9 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:32:22 +0200
Subject: radeonsi: skip drawing if VS, TCS, TES, GS fail to compile or upload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 30 +++++++++++++++++++------
 1 file changed, 23 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 43d2e88d102..aabd499c629 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1402,6 +1402,7 @@ bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+	int r;
 
 	/* Update stages before GS. */
 	if (sctx->tes_shader) {
@@ -1412,11 +1413,15 @@ bool si_update_shaders(struct si_context *sctx)
 		}
 
 		/* VS as LS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
 
 		if (sctx->tcs_shader) {
-			si_shader_select(ctx, sctx->tcs_shader);
+			r = si_shader_select(ctx, sctx->tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 		} else {
 			if (!sctx->fixed_func_tcs_shader) {
@@ -1425,12 +1430,17 @@ bool si_update_shaders(struct si_context *sctx)
 					return false;
 			}
 
-			si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs,
 					  sctx->fixed_func_tcs_shader->current->pm4);
 		}
 
-		si_shader_select(ctx, sctx->tes_shader);
+		r = si_shader_select(ctx, sctx->tes_shader);
+		if (r)
+			return false;
+
 		if (sctx->gs_shader) {
 			/* TES as ES */
 			si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
@@ -1441,18 +1451,24 @@ bool si_update_shaders(struct si_context *sctx)
 		}
 	} else if (sctx->gs_shader) {
 		/* VS as ES */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 	} else {
 		/* VS as VS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		si_update_so(sctx, sctx->vs_shader);
 	}
 
 	/* Update GS. */
 	if (sctx->gs_shader) {
-		si_shader_select(ctx, sctx->gs_shader);
+		r = si_shader_select(ctx, sctx->gs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader);
-- 
cgit v1.2.3


From 1f99b0be7e4e300c2b1761437f740a604ab036bc Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:33:10 +0200
Subject: radeonsi: skip drawing if PS fails to compile or upload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index aabd499c629..b006f524b81 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1487,18 +1487,9 @@ bool si_update_shaders(struct si_context *sctx)
 
 	si_update_vgt_shader_config(sctx);
 
-	si_shader_select(ctx, sctx->ps_shader);
-
-	if (!sctx->ps_shader->current) {
-		struct si_shader_selector *sel;
-
-		/* use a dummy shader if compiling the shader (variant) failed */
-		si_make_dummy_ps(sctx);
-		sel = sctx->dummy_pixel_shader;
-		si_shader_select(ctx, sel);
-		sctx->ps_shader->current = sel->current;
-	}
-
+	r = si_shader_select(ctx, sctx->ps_shader);
+	if (r)
+		return false;
 	si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
 
 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-- 
cgit v1.2.3


From d556346b3590e8d5601c0831577f08e7b1ccecec Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:40:51 +0200
Subject: radeonsi: skip drawing if updating the scratch buffer fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 63 +++++++++++++++++++------
 1 file changed, 49 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b006f524b81..eb5d9f59e84 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1152,14 +1152,16 @@ static void si_update_gs_rings(struct si_context *sctx)
 }
 
 /**
- * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
- *          otherwise.
+ * @returns 1 if \p sel has been updated to use a new scratch buffer
+ *          0 if not
+ *          < 0 if there was a failure
  */
-static unsigned si_update_scratch_buffer(struct si_context *sctx,
+static int si_update_scratch_buffer(struct si_context *sctx,
 				    struct si_shader_selector *sel)
 {
 	struct si_shader *shader;
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+	int r;
 
 	if (!sel)
 		return 0;
@@ -1180,7 +1182,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
 	si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
 
 	/* Replace the shader bo with a new bo that has the relocs applied. */
-	si_shader_binary_upload(sctx->screen, shader);
+	r = si_shader_binary_upload(sctx->screen, shader);
+	if (r)
+		return r;
 
 	/* Update the shader state to use the new shader bo. */
 	si_shader_init_pm4_state(shader);
@@ -1219,7 +1223,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
 	return bytes;
 }
 
-static void si_update_spi_tmpring_size(struct si_context *sctx)
+static bool si_update_spi_tmpring_size(struct si_context *sctx)
 {
 	unsigned current_scratch_buffer_size =
 		si_get_current_scratch_buffer_size(sctx);
@@ -1227,6 +1231,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		si_get_max_scratch_bytes_per_wave(sctx);
 	unsigned scratch_needed_size = scratch_bytes_per_wave *
 		sctx->scratch_waves;
+	int r;
 
 	if (scratch_needed_size > 0) {
 
@@ -1239,6 +1244,8 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 			sctx->scratch_buffer =
 					si_resource_create_custom(&sctx->screen->b.b,
 	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
+			if (!sctx->scratch_buffer)
+				return false;
 		}
 
 		/* Update the shaders, so they are using the latest scratch.  The
@@ -1246,31 +1253,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		 * last used, so we still need to try to update them, even if
 		 * they require scratch buffers smaller than the current size.
 		 */
-		if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+		r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 
 		/* VS can be bound as LS, ES, or VS. */
 		if (sctx->tes_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
 		} else if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		}
 
 		/* TES can be bound as ES or VS. */
 		if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
 		}
 	}
@@ -1281,6 +1314,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 
 	sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
 				S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+	return true;
 }
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
@@ -1502,7 +1536,8 @@ bool si_update_shaders(struct si_context *sctx)
 
 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
 	    si_pm4_state_changed(sctx, gs)) {
-		si_update_spi_tmpring_size(sctx);
+		if (!si_update_spi_tmpring_size(sctx))
+			return false;
 	}
 
 	if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
-- 
cgit v1.2.3


From b737d9c1dce073aa36b21c33cb30fbae89c38153 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:42:22 +0200
Subject: radeonsi: don't forget to update scratch relocations for LS, HS, ES
 shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index eb5d9f59e84..56f868d81db 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1534,8 +1534,12 @@ bool si_update_shaders(struct si_context *sctx)
 		si_mark_atom_dirty(sctx, &sctx->spi_map);
 	}
 
-	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-	    si_pm4_state_changed(sctx, gs)) {
+	if (si_pm4_state_changed(sctx, ls) ||
+	    si_pm4_state_changed(sctx, hs) ||
+	    si_pm4_state_changed(sctx, es) ||
+	    si_pm4_state_changed(sctx, gs) ||
+	    si_pm4_state_changed(sctx, vs) ||
+	    si_pm4_state_changed(sctx, ps)) {
 		if (!si_update_spi_tmpring_size(sctx))
 			return false;
 	}
-- 
cgit v1.2.3


From ae418a7b5640aadb625bb7a7d6e4aa0a98ccb77f Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 19:25:14 +0200
Subject: radeonsi: handle dummy constant buffer allocation failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_pipe.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 01fa5252f71..9edee50ac8a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -180,6 +180,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (sctx->b.chip_class == CIK) {
 		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
 								 PIPE_USAGE_DEFAULT, 16);
+		if (!sctx->null_const_buf.buffer)
+			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
 
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
-- 
cgit v1.2.3


From 7bbce21e458d912279159aa6ac122768c6c06551 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 10 Sep 2015 18:48:12 +0200
Subject: gallium/u_blitter: handle allocation failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: 11.0 <mesa-stable@lists.freedesktop.org>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 9bba07aa18e..2fbf69c67d2 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,
 
    u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                  &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
    u_upload_unmap(ctx->upload);
 
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
 
    u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                  &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
    vb.stride = 0;
 
    blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
 
    util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
 
+out:
    blitter_restore_vertex_states(ctx);
    blitter_restore_render_cond(ctx);
    blitter_unset_running_flag(ctx);
-- 
cgit v1.2.3


From ebcc886d87e465f7bd3a172f9ff4dc825c238c5c Mon Sep 17 00:00:00 2001
From: Benjamin Bellec <b.bellec@gmail.com>
Date: Fri, 4 Sep 2015 20:27:22 +0200
Subject: gallium/radeon: remove the percentage symbol from HUD temperature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HUD adds '%' if max == 100.

Signed-off-by: Benjamin Bellec <b.bellec@gmail.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d97b721a12f..08839343b74 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -721,7 +721,7 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
 		{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
 		{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
 		{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-		{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
+		{"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
 		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
 		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
 	};
-- 
cgit v1.2.3


From 1d040160f861955a14728bea48e697cfaed8e045 Mon Sep 17 00:00:00 2001
From: Mauro Rossi <issor.oruam@gmail.com>
Date: Wed, 23 Sep 2015 22:30:55 +0200
Subject: android: radeonsi: fix sid_tables.h missing LOCAL_MODULE_CLASS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeonsi/Android.mk | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/Android.mk b/src/gallium/drivers/radeonsi/Android.mk
index b469aca946e..7e5e54b3493 100644
--- a/src/gallium/drivers/radeonsi/Android.mk
+++ b/src/gallium/drivers/radeonsi/Android.mk
@@ -34,6 +34,7 @@ LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_radeonsi
 
 # generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 intermediates := $(call local-generated-sources-dir)
 LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
 
-- 
cgit v1.2.3


From 7fee23569b0e3a4d4636a83fb6751ee82987ec5f Mon Sep 17 00:00:00 2001
From: Alejandro Piñeiro <apinheiro@igalia.com>
Date: Wed, 23 Sep 2015 19:22:17 +0200
Subject: i965/vec4: check swizzle before discarding a uniform on a 3src
 operand

Without this commit, copy propagation is discarded if it involves
a uniform with an instruction that has 3 sources. But 3 sourced
instructions can access scalar values.

For example, this is what vec4_visitor::fix_3src_operand() is already
doing:

   if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
      return src;

Shader-db results (unfiltered) on NIR:
total instructions in shared programs: 6259650 -> 6241985 (-0.28%)
instructions in affected programs:     812755 -> 795090 (-2.17%)
helped:                                7930
HURT:                                  0

Shader-db results (unfiltered) on IR:
total instructions in shared programs: 6445822 -> 6441788 (-0.06%)
instructions in affected programs:     296630 -> 292596 (-1.36%)
helped:                                2533
HURT:                                  0

v2:
- Updated commit message, using Matt Turner suggestions
- Move the check after we've created the final value, as Jason
  Ekstrand suggested
- Clean up the condition

v3:
- Move the check back to the original place, to keep things
  tidy, as suggested by Jason Ekstrand

v4:
- Fixed missing is_single_value_swizzle() as pointed by Jason Ekstrand

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index d3f0ddde258..5b6444e3210 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -325,7 +325,11 @@ try_copy_propagate(const struct brw_device_info *devinfo,
        inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
       return false;
 
-   if (inst->is_3src() && value.file == UNIFORM)
+   unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
+                                                   value.swizzle);
+   if (inst->is_3src() &&
+       value.file == UNIFORM &&
+       !brw_is_single_value_swizzle(composed_swizzle))
       return false;
 
    if (inst->is_send_from_grf())
@@ -380,8 +384,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
    if (inst->src[arg].negate)
       value.negate = !value.negate;
 
-   value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
-                                       value.swizzle);
+   value.swizzle = composed_swizzle;
    if (has_source_modifiers &&
        value.type != inst->src[arg].type) {
       assert(can_change_source_types(inst));
-- 
cgit v1.2.3


From d6bb46bbe8e4ef90dedc5a04c7434a8113c10a8b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 23 Sep 2015 18:06:19 -0700
Subject: glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.

... with only ARB_shader_atomic_counters.

I expected to see interactions with ARB_tessellation_shader in the
ARB_shader_atomic_counters spec, but they do not exist. It seems that we
should unconditionally expose these variables in the presence of
ARB_shader_atomic_counters:

   gl_MaxTessControlAtomicCounters
   gl_MaxTessEvaluationAtomicCounters

This partially reverts commit da7adb99e8. The commit also affected
gl_MaxTessControlImageUniforms and gl_MaxTessEvaluationImageUniforms
similarly but the ARB_shader_image_load_store spec does list an
interaction with ARB_tessellation_shader.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92095
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/builtin_variables.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b5e2908f951..0722051c08e 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -680,14 +680,10 @@ builtin_variable_generator::generate_constants()
       if (!state->es_shader) {
          add_const("gl_MaxGeometryAtomicCounters",
                    state->Const.MaxGeometryAtomicCounters);
-
-	 if (state->is_version(400, 0) ||
-             state->ARB_tessellation_shader_enable) {
-		 add_const("gl_MaxTessControlAtomicCounters",
-                           state->Const.MaxTessControlAtomicCounters);
-		 add_const("gl_MaxTessEvaluationAtomicCounters",
-                           state->Const.MaxTessEvaluationAtomicCounters);
-	 }
+         add_const("gl_MaxTessControlAtomicCounters",
+                   state->Const.MaxTessControlAtomicCounters);
+         add_const("gl_MaxTessEvaluationAtomicCounters",
+                   state->Const.MaxTessEvaluationAtomicCounters);
       }
    }
 
-- 
cgit v1.2.3


From 19604d30e1351868f7f54847c91ffec7b3fcd27e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Sun, 20 Sep 2015 01:33:17 +0200
Subject: mesa: fix mipmap generation for immutable, compressed textures

If the immutable compressed texture didn't have the full mip pyramid,
this didn't work, because it tried to generate mip levels for non-existing
levels. _mesa_prepare_mipmap_level() would correctly handle this by returning
FALSE if the mip level didn't exist, however we actually created the
non-existing mip level right before that because we used _mesa_get_tex_image()
before calling _mesa_prepare_mipmap_level(). It would then proceed to crash
(we allocated the mip level, which is a bad idea on an immutable texture,
but didn't initialize the values, leading to assertion failures or segfaults).
Fix this by using _mesa_select_tex_image() instead and call it after
_mesa_prepare_mipmap_level(), as that function will allocate missing mip levels
for non-immutable textures already.
This fixes a (2 year old) crash with astromenace which was hack-fixed in ubuntu
packages instead: http://bugs.debian.org/718680 (I guess most apps do full mip
chains - I believe this app not doing it is actually unintentional, always one
level less than full mip chain...).

Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/mipmap.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 2bf5902fba4..ab16c2854a8 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1922,11 +1922,8 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
       }
 
       /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         return;
-      }
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);
 
       if (target == GL_TEXTURE_1D_ARRAY) {
 	 srcDepth = srcHeight;
@@ -2110,7 +2107,19 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                          srcWidth, srcHeight, srcDepth,
                                          &dstWidth, &dstHeight, &dstDepth);
       if (!nextLevel)
-	 break;
+	 goto end;
+
+      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+                                      dstWidth, dstHeight, dstDepth,
+                                      border, srcImage->InternalFormat,
+                                      srcImage->TexFormat)) {
+         /* all done */
+         goto end;
+      }
+
+      /* get dest gl_texture_image */
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);
 
       /* Compute dst image strides and alloc memory on first iteration */
       temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth);
@@ -2124,13 +2133,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
 	 }
       }
 
-      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         goto end;
-      }
-
       /* for 2D arrays, setup array[depth] of slice pointers */
       for (i = 0; i < srcDepth; i++) {
          temp_src_slices[i] = temp_src + temp_src_img_stride * i;
@@ -2149,14 +2151,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                   dstWidth, dstHeight, dstDepth,
                                   temp_dst_slices, temp_dst_row_stride);
 
-      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
-                                      dstWidth, dstHeight, dstDepth,
-                                      border, srcImage->InternalFormat,
-                                      srcImage->TexFormat)) {
-         /* all done */
-         goto end;
-      }
-
       /* The image space was allocated above so use glTexSubImage now */
       ctx->Driver.TexSubImage(ctx, 2, dstImage,
                               0, 0, 0, dstWidth, dstHeight, dstDepth,
-- 
cgit v1.2.3


From 4a1ba7e6bd3ddcab4647a1382d14165a08c0d3b0 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 17 Sep 2015 10:05:22 -0700
Subject: mesa/cs: Add _mesa_validate_DispatchCompute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move API validation to _mesa_validate_DispatchCompute in
api_validate.c.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/api_validate.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/api_validate.h |  4 ++++
 src/mesa/main/compute.c      | 28 +++++-----------------------
 3 files changed, 53 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 53c8fb893b5..b46226abf78 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -882,3 +882,47 @@ _mesa_validate_MultiDrawElementsIndirect(struct gl_context *ctx,
 
    return GL_TRUE;
 }
+
+static bool
+check_valid_to_compute(struct gl_context *ctx, const char *function)
+{
+   struct gl_shader_program *prog;
+
+   if (!_mesa_has_compute_shaders(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "unsupported function (%s) called",
+                  function);
+      return false;
+   }
+
+   prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
+   if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(no active compute shader)",
+                  function);
+      return false;
+   }
+
+   return true;
+}
+
+GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+                               const GLuint *num_groups)
+{
+   int i;
+   FLUSH_CURRENT(ctx, 0);
+
+   if (!check_valid_to_compute(ctx, "glDispatchCompute"))
+      return GL_FALSE;
+
+   for (i = 0; i < 3; i++) {
+      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glDispatchCompute(num_groups_%c)", 'x' + i);
+         return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/main/api_validate.h b/src/mesa/main/api_validate.h
index 0ce7b69d5de..ef2c7949a42 100644
--- a/src/mesa/main/api_validate.h
+++ b/src/mesa/main/api_validate.h
@@ -105,5 +105,9 @@ _mesa_validate_MultiDrawElementsIndirect(struct gl_context *ctx,
                                          GLsizei primcount,
                                          GLsizei stride);
 
+extern GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+                               const GLuint *num_groups);
+
 
 #endif
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index 37a4ba70eed..f67ffbb6bfa 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -24,6 +24,7 @@
 #include "glheader.h"
 #include "compute.h"
 #include "context.h"
+#include "api_validate.h"
 
 void GLAPIENTRY
 _mesa_DispatchCompute(GLuint num_groups_x,
@@ -31,31 +32,12 @@ _mesa_DispatchCompute(GLuint num_groups_x,
                       GLuint num_groups_z)
 {
    GET_CURRENT_CONTEXT(ctx);
-   int i;
-   struct gl_shader_program *prog;
    const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
 
-   if (ctx->Extensions.ARB_compute_shader) {
-      for (i = 0; i < 3; i++) {
-         if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
-            _mesa_error(ctx, GL_INVALID_VALUE,
-                        "glDispatchCompute(num_groups_%c)", 'x' + i);
-            return;
-         }
-      }
-      if (!_mesa_valid_to_render(ctx, "glDispatchCompute"))
-         return;
-      prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
-      if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glDispatchCompute(no active compute shader)");
-         return;
-      }
-      ctx->Driver.DispatchCompute(ctx, num_groups);
-   } else {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "unsupported function (glDispatchCompute) called");
-   }
+   if (!_mesa_validate_DispatchCompute(ctx, num_groups))
+      return;
+
+   ctx->Driver.DispatchCompute(ctx, num_groups);
 }
 
 extern void GLAPIENTRY
-- 
cgit v1.2.3


From 12cf91db0236291ebaff71f602d929064b1ec096 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 17 Sep 2015 10:10:07 -0700
Subject: mesa/cs: Support GL_DISPATCH_INDIRECT_BUFFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
 * Use _mesa_has_compute_shaders (Ilia)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/bufferobj.c        | 15 +++++++++++++++
 src/mesa/main/get.c              |  4 ++++
 src/mesa/main/get_hash_params.py |  1 +
 src/mesa/main/mtypes.h           |  1 +
 4 files changed, 21 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index f87cea97557..20963bda8f2 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -93,6 +93,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
          return &ctx->DrawIndirectBuffer;
       }
       break;
+   case GL_DISPATCH_INDIRECT_BUFFER:
+      if (_mesa_has_compute_shaders(ctx)) {
+         return &ctx->DispatchIndirectBuffer;
+      }
+      break;
    case GL_TRANSFORM_FEEDBACK_BUFFER:
       if (ctx->Extensions.EXT_transform_feedback) {
          return &ctx->TransformFeedback.CurrentBuffer;
@@ -844,6 +849,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
    _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer,
 				 ctx->Shared->NullBufferObj);
 
+   _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
+				 ctx->Shared->NullBufferObj);
+
    for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
       _mesa_reference_buffer_object(ctx,
 				    &ctx->UniformBufferBindings[i].BufferObject,
@@ -888,6 +896,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
 
    _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
 
+   _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+
    for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
       _mesa_reference_buffer_object(ctx,
 				    &ctx->UniformBufferBindings[i].BufferObject,
@@ -1233,6 +1243,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
             _mesa_BindBuffer( GL_DRAW_INDIRECT_BUFFER, 0 );
          }
 
+         /* unbind ARB_compute_shader binding point */
+         if (ctx->DispatchIndirectBuffer == bufObj) {
+            _mesa_BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
+         }
+
          /* unbind ARB_copy_buffer binding points */
          if (ctx->CopyReadBuffer == bufObj) {
             _mesa_BindBuffer( GL_COPY_READ_BUFFER, 0 );
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 926caef7031..e643d3a5e36 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1036,6 +1036,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
          v->value_int = 0;
       }
       break;
+   /* GL_ARB_compute_shader */
+   case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
+      v->value_int = ctx->DispatchIndirectBuffer->Name;
+      break;
    }
 }
 
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index e2fb2a1e3c9..805becb26b7 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -450,6 +450,7 @@ descriptor=[
   [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
   [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
   [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+  [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
 
 # GL_ARB_framebuffer_no_attachments / GLES 3.1
   ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index fac45aae81c..d308b986175 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4288,6 +4288,7 @@ struct gl_context
    struct gl_perf_monitor_state PerfMonitor;
 
    struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+   struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
 
    struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
    struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
-- 
cgit v1.2.3


From d11d018ce38b76a580242d64e61e8e30dad035e8 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 17 Sep 2015 11:14:45 -0700
Subject: mesa/cs: Implement glDispatchComputeIndirect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/api_validate.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/api_validate.h |  4 +++
 src/mesa/main/compute.c      | 10 +++----
 src/mesa/main/dd.h           |  1 +
 4 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index b46226abf78..a46c1944e96 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -926,3 +926,67 @@ _mesa_validate_DispatchCompute(struct gl_context *ctx,
 
    return GL_TRUE;
 }
+
+static GLboolean
+valid_dispatch_indirect(struct gl_context *ctx,
+                        GLintptr indirect,
+                        GLsizei size, const char *name)
+{
+   GLintptr end = (GLintptr)indirect + size;
+
+   if (!check_valid_to_compute(ctx, name))
+      return GL_FALSE;
+
+   /* From the ARB_compute_shader specification:
+    *
+    * "An INVALID_OPERATION error is generated [...] if <indirect> is less
+    *  than zero or not a multiple of the size, in basic machine units, of
+    *  uint."
+    */
+   if ((GLintptr)indirect & (sizeof(GLuint) - 1)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(indirect is not aligned)", name);
+      return GL_FALSE;
+   }
+
+   if ((GLintptr)indirect < 0) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(indirect is less than zero)", name);
+      return GL_FALSE;
+   }
+
+   if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
+      return GL_FALSE;
+   }
+
+   if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name);
+      return GL_FALSE;
+   }
+
+   /* From the ARB_compute_shader specification:
+    *
+    * "An INVALID_OPERATION error is generated if this command sources data
+    *  beyond the end of the buffer object [...]"
+    */
+   if (ctx->DispatchIndirectBuffer->Size < end) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(DISPATCH_INDIRECT_BUFFER too small)", name);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+                                       GLintptr indirect)
+{
+   FLUSH_CURRENT(ctx, 0);
+
+   return valid_dispatch_indirect(ctx, indirect, 3 * sizeof(GLuint),
+                                  "glDispatchComputeIndirect");
+}
diff --git a/src/mesa/main/api_validate.h b/src/mesa/main/api_validate.h
index ef2c7949a42..5d030a7ba37 100644
--- a/src/mesa/main/api_validate.h
+++ b/src/mesa/main/api_validate.h
@@ -109,5 +109,9 @@ extern GLboolean
 _mesa_validate_DispatchCompute(struct gl_context *ctx,
                                const GLuint *num_groups);
 
+extern GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+                                       GLintptr indirect);
+
 
 #endif
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index f67ffbb6bfa..a0120cf0c64 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -45,10 +45,8 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if (ctx->Extensions.ARB_compute_shader) {
-      assert(!"TODO");
-   } else {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "unsupported function (glDispatchComputeIndirect) called");
-   }
+   if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
+      return;
+
+   ctx->Driver.DispatchComputeIndirect(ctx, indirect);
 }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 2c746fc45de..88f37273e1e 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1021,6 +1021,7 @@ struct dd_function_table {
     */
    /*@{*/
    void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
+   void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
    /*@}*/
 };
 
-- 
cgit v1.2.3


From ebbe6cdad7ab082d2b191fe6c7c0eaa6921d55de Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 17 Sep 2015 16:25:24 -0700
Subject: i965/cs: Implement DispatchComputeIndirect support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 57 ++++++++++++++++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_defines.h |  2 ++
 src/mesa/drivers/dri/i965/intel_reg.h   |  5 +++
 3 files changed, 60 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 5693ab507d4..c392152e48d 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -31,14 +31,46 @@
 #include "brw_draw.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
 #include "brw_defines.h"
 
 
 static void
-brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+brw_emit_gpgpu_walker(struct brw_context *brw,
+                      const void *compute_param,
+                      bool indirect)
 {
    const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
 
+   const GLuint *num_groups;
+   uint32_t indirect_flag;
+
+   if (!indirect) {
+      num_groups = (const GLuint *)compute_param;
+      indirect_flag = 0;
+   } else {
+      GLintptr indirect_offset = (GLintptr)compute_param;
+      static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+      num_groups = indirect_group_counts;
+
+      struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
+      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
+            intel_buffer_object(indirect_buffer),
+            indirect_offset, 3 * sizeof(GLuint));
+
+      indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
+
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 0);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 4);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 8);
+   }
+
    const unsigned simd_size = prog_data->simd_size;
    unsigned group_size = prog_data->local_size[0] *
       prog_data->local_size[1] * prog_data->local_size[2];
@@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
    uint32_t dwords = brw->gen < 8 ? 11 : 15;
    BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
    OUT_BATCH(0);
    if (brw->gen >= 8) {
       OUT_BATCH(0);                     /* Indirect Data Length */
@@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
 
 static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+brw_dispatch_compute_common(struct gl_context *ctx,
+                            const void *compute_param,
+                            bool indirect)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
@@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
    brw->no_batch_wrap = true;
    brw_upload_compute_state(brw);
 
-   brw_emit_gpgpu_walker(brw, num_groups);
+   brw_emit_gpgpu_walker(brw, compute_param, indirect);
 
    brw->no_batch_wrap = false;
 
@@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
     */
 }
 
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
+   brw_dispatch_compute_common(ctx,
+                               num_groups,
+                               false);
+}
+
+static void
+brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
+{
+   brw_dispatch_compute_common(ctx,
+                               (void *)indirect,
+                               true);
+}
 
 void
 brw_init_compute_functions(struct dd_function_table *functions)
 {
    functions->DispatchCompute = brw_dispatch_compute;
+   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f9d8d1b98f2..f0797985a19 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2770,6 +2770,8 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
 #define MEDIA_STATE_FLUSH                       0x7004
 #define GPGPU_WALKER                            0x7105
+/* GEN7 DW0 */
+# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
 /* GEN8+ DW2 */
 # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 58007d3047f..a261c2bbb8a 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -173,6 +173,11 @@
 #define GEN7_3DPRIM_START_INSTANCE      0x243C
 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
 
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX         0x2500
+#define GEN7_GPGPU_DISPATCHDIMY         0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+
 #define GEN7_CACHE_MODE_1               0x7004
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
 # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
-- 
cgit v1.2.3


From a6b55beb78501e3447c55a2d3a4e4b5d6950b86c Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Fri, 4 Sep 2015 11:22:15 +0300
Subject: mesa: add packed_varyings list to gl_shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required to store information about packed varyings, currently
these variables get lost and cannot be retrieved later in sensible way
for program interface queries. List will be utilized by next patch.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/glsl/lower_packed_varyings.cpp | 16 ++++++++++++----
 src/mesa/main/mtypes.h             |  1 +
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp
index cfe414ae088..5d66ca931cf 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -170,7 +170,7 @@ public:
                                  exec_list *out_instructions,
                                  exec_list *out_variables);
 
-   void run(exec_list *instructions);
+   void run(struct gl_shader *shader);
 
 private:
    void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
@@ -252,9 +252,9 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
 }
 
 void
-lower_packed_varyings_visitor::run(exec_list *instructions)
+lower_packed_varyings_visitor::run(struct gl_shader *shader)
 {
-   foreach_in_list(ir_instruction, node, instructions) {
+   foreach_in_list(ir_instruction, node, shader->ir) {
       ir_variable *var = node->as_variable();
       if (var == NULL)
          continue;
@@ -272,6 +272,14 @@ lower_packed_varyings_visitor::run(exec_list *instructions)
       assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
              !var->type->contains_integer());
 
+      /* Clone the variable for program resource list before
+       * it gets modified and lost.
+       */
+      if (!shader->packed_varyings)
+         shader->packed_varyings = new (shader) exec_list;
+
+      shader->packed_varyings->push_tail(var->clone(shader, NULL));
+
       /* Change the old varying into an ordinary global. */
       assert(var->data.mode != ir_var_temporary);
       var->data.mode = ir_var_auto;
@@ -711,7 +719,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
                                          gs_input_vertices,
                                          &new_instructions,
                                          &new_variables);
-   visitor.run(instructions);
+   visitor.run(shader);
    if (mode == ir_var_shader_out) {
       if (shader->Stage == MESA_SHADER_GEOMETRY) {
          /* For geometry shaders, outputs need to be lowered before each call
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index d308b986175..22b1d14636f 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2292,6 +2292,7 @@ struct gl_shader
    struct gl_uniform_block *UniformBlocks;
 
    struct exec_list *ir;
+   struct exec_list *packed_varyings;
    struct glsl_symbol_table *symbols;
 
    bool uses_builtin_functions;
-- 
cgit v1.2.3


From 4639cea2921669527eb43dcb49724c05afb27e8e Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Fri, 4 Sep 2015 11:30:34 +0300
Subject: glsl: add packed varyings to program resource list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes sure that user is still able to query properties about
variables that have gotten packed by lower_packed_varyings pass.

Fixes following OpenGL ES 3.1 test:
   ES31-CTS.program_interface_query.separate-programs-vertex

v2: fix 'name included in packed list' check (Ilia Mirkin)
v3: iterate over instances of name using strtok_r (Ilia Mirkin)

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/glsl/linker.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index fd69dbc2c73..53e276cc589 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3087,6 +3087,35 @@ add_program_resource(struct gl_shader_program *prog, GLenum type,
    return true;
 }
 
+/* Function checks if a variable var is a packed varying and
+ * if given name is part of packed varying's list.
+ *
+ * If a variable is a packed varying, it has a name like
+ * 'packed:a,b,c' where a, b and c are separate variables.
+ */
+static bool
+included_in_packed_varying(ir_variable *var, const char *name)
+{
+   if (strncmp(var->name, "packed:", 7) != 0)
+      return false;
+
+   char *list = strdup(var->name + 7);
+   assert(list);
+
+   bool found = false;
+   char *saveptr;
+   char *token = strtok_r(list, ",", &saveptr);
+   while (token) {
+      if (strcmp(token, name) == 0) {
+         found = true;
+         break;
+      }
+      token = strtok_r(NULL, ",", &saveptr);
+   }
+   free(list);
+   return found;
+}
+
 /**
  * Function builds a stage reference bitmask from variable name.
  */
@@ -3114,6 +3143,11 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
          if (var) {
             unsigned baselen = strlen(var->name);
 
+            if (included_in_packed_varying(var, name)) {
+                  stages |= (1 << i);
+                  break;
+            }
+
             /* Type needs to match if specified, otherwise we might
              * pick a variable with same name but different interface.
              */
@@ -3139,9 +3173,9 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
 
 static bool
 add_interface_variables(struct gl_shader_program *shProg,
-                        struct gl_shader *sh, GLenum programInterface)
+                        exec_list *ir, GLenum programInterface)
 {
-   foreach_in_list(ir_instruction, node, sh->ir) {
+   foreach_in_list(ir_instruction, node, ir) {
       ir_variable *var = node->as_variable();
       uint8_t mask = 0;
 
@@ -3176,6 +3210,12 @@ add_interface_variables(struct gl_shader_program *shProg,
          continue;
       };
 
+      /* Skip packed varyings, packed varyings are handled separately
+       * by add_packed_varyings.
+       */
+      if (strncmp(var->name, "packed:", 7) == 0)
+         continue;
+
       if (!add_program_resource(shProg, programInterface, var,
                                 build_stageref(shProg, var->name,
                                                var->data.mode) | mask))
@@ -3184,6 +3224,33 @@ add_interface_variables(struct gl_shader_program *shProg,
    return true;
 }
 
+static bool
+add_packed_varyings(struct gl_shader_program *shProg, int stage)
+{
+   struct gl_shader *sh = shProg->_LinkedShaders[stage];
+   GLenum iface;
+
+   if (!sh || !sh->packed_varyings)
+      return true;
+
+   foreach_in_list(ir_instruction, node, sh->packed_varyings) {
+      ir_variable *var = node->as_variable();
+      if (var) {
+         switch (var->data.mode) {
+         case ir_var_shader_in:
+            iface = GL_PROGRAM_INPUT;
+         case ir_var_shader_out:
+            iface = GL_PROGRAM_OUTPUT;
+         }
+         if (!add_program_resource(shProg, iface, var,
+                                   build_stageref(shProg, var->name,
+                                                  var->data.mode)))
+            return false;
+      }
+   }
+   return true;
+}
+
 /**
  * Builds up a list of program resources that point to existing
  * resource data.
@@ -3216,12 +3283,17 @@ build_program_resource_list(struct gl_shader_program *shProg)
    if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
       return;
 
+   if (!add_packed_varyings(shProg, input_stage))
+      return;
+   if (!add_packed_varyings(shProg, output_stage))
+      return;
+
    /* Add inputs and outputs to the resource list. */
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
                                 GL_PROGRAM_INPUT))
       return;
 
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir,
                                 GL_PROGRAM_OUTPUT))
       return;
 
-- 
cgit v1.2.3


From 4cf908f9cbaac5563dd3ff355399e2b56addbca4 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 3 Sep 2015 13:58:25 +0200
Subject: mesa: set MAX_SHADER_STORAGE_BUFFERS to 16.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Set the value to 16 and drop the comment. (Kristian)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index b35031db3c9..f29de5f91fd 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -171,7 +171,7 @@
 #define MAX_PROGRAM_LOCAL_PARAMS       4096
 #define MAX_UNIFORMS                   4096
 #define MAX_UNIFORM_BUFFERS            15 /* + 1 default uniform buffer */
-#define MAX_SHADER_STORAGE_BUFFERS     7  /* + 1 default shader storage buffer */
+#define MAX_SHADER_STORAGE_BUFFERS     16
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
 #define MAX_COMBINED_SHADER_STORAGE_BUFFERS   (MAX_SHADER_STORAGE_BUFFERS * 6)
-- 
cgit v1.2.3


From 332ff009ffcbdad2402f089060623c0a86fa253c Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 19 Mar 2015 11:27:21 +0100
Subject: i965: Use 64-byte offset alignment for shader storage buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This should be a cacheline (64 bytes) so that we can safely have the
CPU and GPU writing the same SSBO on non-cachecoherent systems (our
Atom CPUs). With UBOs, the GPU never writes, so there's no
problem. For an SSBO, the GPU and the CPU can be updating disjoint
regions of the buffer simultaneously and that will break if the
regions overlap the same cacheline.

v2:
- Use cacheline size (64 bytes) instead of 16 bytes (Kristian).
- Update commit log and add a comment in the code explaining
  why we use cacheline size (Ben).

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 7c1c13300dc..0cfc8435964 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -567,6 +567,15 @@ brw_initialize_context_constants(struct brw_context *brw)
     * However, unaligned accesses are slower, so enforce buffer alignment.
     */
    ctx->Const.UniformBufferOffsetAlignment = 16;
+
+   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
+    * that we can safely have the CPU and GPU writing the same SSBO on
+    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
+    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
+    * be updating disjoint regions of the buffer simultaneously and that will
+    * break if the regions overlap the same cacheline.
+    */
+   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
    ctx->Const.TextureBufferOffsetAlignment = 16;
    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
-- 
cgit v1.2.3


From dfdeb94a5aedf838fe1f180fd68a7b5b2a3d7e99 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 19 Mar 2015 11:07:19 +0100
Subject: i965: Implement DriverFlags.NewShaderStorageBuffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We use the same dirty state for SSBOs and UBOs because they share the
same infrastructure.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_state_upload.c     | 1 +
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 14627d5231b..bf06ed38bd7 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -484,6 +484,7 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index a51095f2d65..7a5b3fca595 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -106,6 +106,8 @@ alloc_buffer_object(struct brw_context *brw,
     */
    if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
       brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
    if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
       brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
    if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-- 
cgit v1.2.3


From 36f392c4ef5fd8793127132f9b4465249e92c9ce Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 9 Jun 2015 09:16:33 +0200
Subject: i965: set ARB_shader_storage_buffer_object related constant values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Add tessellation shader constants assignment

v3:
- Set MaxShaderStorageBufferBindings to 36.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 0cfc8435964..47203c1701b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -579,6 +579,18 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.TextureBufferOffsetAlignment = 16;
    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
+   /* FIXME: Tessellation stages are not yet supported in i965, so
+    * MaxCombinedShaderStorageBlocks doesn't take them into account.
+    */
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
+   ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
+   ctx->Const.MaxShaderStorageBufferBindings = 36;
+
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-- 
cgit v1.2.3


From bdbabc57e302b73e2db30f6d46918afb2b442c7b Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 19 Mar 2015 12:11:39 +0100
Subject: i965: Set MaxShaderStorageBuffers for compute shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Set it after the driver's MaxShaderStorageBuffers value assignment.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 47203c1701b..9dcdaf5cd4f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -591,6 +591,9 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
    ctx->Const.MaxShaderStorageBufferBindings = 36;
 
+   if (_mesa_extension_override_enables.ARB_compute_shader)
+      ctx->Const.MaxShaderStorageBufferBindings += 12;
+
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-- 
cgit v1.2.3


From 37da6a2acd549d145b1691bb1f6bab87b65c92a6 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 23 Mar 2015 13:48:43 +0100
Subject: i965: Upload Shader Storage Buffer Object surfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since these are a special kind of UBOs we emit them together reusing the
same infrastructure, however, we use a RAW surface so we can reuse
existing untyped read/write/atomic messages which include a pixel mask
header that we need to set to obtain correct behavior with helper
invocations of the fragment shader.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.h          |  6 +++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 64 +++++++++++++++++++-----
 2 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index b05b8bd69bf..144d3e327d4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1779,6 +1779,12 @@ void brw_create_constant_surface(struct brw_context *brw,
                                  uint32_t size,
                                  uint32_t *out_offset,
                                  bool dword_pitch);
+void brw_create_buffer_surface(struct brw_context *brw,
+                               drm_intel_bo *bo,
+                               uint32_t offset,
+                               uint32_t size,
+                               uint32_t *out_offset,
+                               bool dword_pitch);
 void brw_update_buffer_texture_surface(struct gl_context *ctx,
                                        unsigned unit,
                                        uint32_t *surf_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 3af4b995a94..24ff2d6a443 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -410,6 +410,29 @@ brw_create_constant_surface(struct brw_context *brw,
                                        elements, stride, false);
 }
 
+/**
+ * Create the buffer surface. Shader buffer variables will be
+ * read from / write to this buffer with Data Port Read/Write
+ * instructions/messages.
+ */
+void
+brw_create_buffer_surface(struct brw_context *brw,
+                          drm_intel_bo *bo,
+                          uint32_t offset,
+                          uint32_t size,
+                          uint32_t *out_offset,
+                          bool dword_pitch)
+{
+   /* Use a raw surface so we can reuse existing untyped read/write/atomic
+    * messages. We need these specifically for the fragment shader since they
+    * include a pixel mask header that we need to ensure correct behavior
+    * with helper invocations, which cannot write to the buffer.
+    */
+   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
+                                       BRW_SURFACEFORMAT_RAW,
+                                       size, 1, true);
+}
+
 /**
  * Set up a binding table entry for use by stream output logic (transform
  * feedback).
@@ -905,25 +928,40 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
    uint32_t *surf_offsets =
       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 
-   for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
-      struct gl_uniform_buffer_binding *binding;
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
       struct intel_buffer_object *intel_bo;
 
-      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
-      intel_bo = intel_buffer_object(binding->BufferObject);
-      drm_intel_bo *bo =
-         intel_bufferobj_buffer(brw, intel_bo,
-                                binding->Offset,
-                                binding->BufferObject->Size - binding->Offset);
-
       /* Because behavior for referencing outside of the binding's size in the
        * glBindBufferRange case is undefined, we can just bind the whole buffer
        * glBindBufferBase wants and be a correct implementation.
        */
-      brw_create_constant_surface(brw, bo, binding->Offset,
-                                  bo->size - binding->Offset,
-                                  &surf_offsets[i],
-                                  dword_pitch);
+      if (!shader->UniformBlocks[i].IsShaderStorage) {
+         struct gl_uniform_buffer_binding *binding;
+         binding =
+            &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+         intel_bo = intel_buffer_object(binding->BufferObject);
+         drm_intel_bo *bo =
+            intel_bufferobj_buffer(brw, intel_bo,
+                                   binding->Offset,
+                                   binding->BufferObject->Size - binding->Offset);
+         brw_create_constant_surface(brw, bo, binding->Offset,
+                                     bo->size - binding->Offset,
+                                     &surf_offsets[i],
+                                     dword_pitch);
+      } else {
+         struct gl_shader_storage_buffer_binding *binding;
+         binding =
+            &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
+         intel_bo = intel_buffer_object(binding->BufferObject);
+         drm_intel_bo *bo =
+            intel_bufferobj_buffer(brw, intel_bo,
+                                   binding->Offset,
+                                   binding->BufferObject->Size - binding->Offset);
+         brw_create_buffer_surface(brw, bo, binding->Offset,
+                                   bo->size - binding->Offset,
+                                   &surf_offsets[i],
+                                   dword_pitch);
+      }
    }
 
    if (shader->NumUniformBlocks)
-- 
cgit v1.2.3


From 2773a7cf1d929729de04abf516ce5bf9eac77834 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 9 Apr 2015 10:33:30 +0200
Subject: i965: handle visiting of ir_var_shader_storage variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 514ccd6daf2..ac086a72eb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1074,11 +1074,12 @@ vec4_visitor::visit(ir_variable *ir)
       break;
 
    case ir_var_uniform:
+   case ir_var_shader_storage:
       reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
 
       /* Thanks to the lower_ubo_reference pass, we will see only
-       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-       * variables, so no need for them to be in variable_ht.
+       * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+       * for UBO/SSBO variables, so no need for them to be in variable_ht.
        *
        * Some uniforms, such as samplers and atomic counters, have no actual
        * storage, so we should ignore them.
-- 
cgit v1.2.3


From 6335c7923683ecf37bcd59007a144aa37c857343 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 9 Apr 2015 09:14:38 +0200
Subject: i965/fs: Do not split buffer variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Buffer variables are the same as uniforms, only that read/write, so we want
the same treatment.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 9e92ae85e43..6000e35b9b9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -106,6 +106,7 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
 
    switch (var->data.mode) {
    case ir_var_uniform:
+   case ir_var_shader_storage:
    case ir_var_shader_in:
    case ir_var_shader_out:
    case ir_var_system_value:
-- 
cgit v1.2.3


From f45d39f6afc436ee4c68a21382933b2b39879eef Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 25 Aug 2015 08:02:46 +0200
Subject: glsl: return error if unsized arrays are found in OpenGL ES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_to_hir.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 72c6459da3c..b67ae704bb0 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6252,6 +6252,22 @@ ast_interface_block::hir(exec_list *instructions,
       else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
          handle_tess_ctrl_shader_output_decl(state, loc, var);
 
+      for (unsigned i = 0; i < num_variables; i++) {
+         /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+          *
+          * "If an array is declared as the last member of a shader storage
+          * block and the size is not specified at compile-time, it is
+          * sized at run-time. In all other cases, arrays are sized only
+          * at compile-time."
+          */
+         if (state->es_shader && fields[i].type->is_unsized_array()) {
+             _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                              "only last member of a shader storage block "
+                              "can be defined as unsized array",
+                              fields[i].name);
+         }
+      }
+
       if (ir_variable *earlier =
           state->symbols->get_variable(this->instance_name)) {
          if (!redeclaring_per_vertex) {
-- 
cgit v1.2.3


From f3f64cd0c4b9cf3363056ddc9c4d7616614ce829 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 18 Mar 2015 15:32:03 +0100
Subject: glsl: add support for unsized arrays in shader storage blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They only can be defined in the last position of the shader
storage blocks.

When an unsized array is used in different shaders, it might be
converted in different sized arrays, avoid get a linker error
in that case.

v2:
- Rework error condition and error messages (Timothy Arceri)

v3:
- Move OpenGL ES check to its own patch.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_array_index.cpp |   3 +-
 src/glsl/ast_to_hir.cpp      |  74 +++++++++++++++++++++++++-----
 src/glsl/ir.cpp              |   1 +
 src/glsl/ir.h                |  14 ++++++
 src/glsl/linker.cpp          | 107 ++++++++++++++++++++++++++++---------------
 5 files changed, 150 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index ae399f03a9b..dfb31073f82 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -226,7 +226,8 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
              * by the linker.
              */
          }
-         else {
+         else if (array->variable_referenced()->data.mode !=
+                  ir_var_shader_storage) {
             _mesa_glsl_error(&loc, state, "unsized array index must be constant");
          }
       } else if (array->type->fields.array->is_interface()
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index b67ae704bb0..92038a62d81 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5880,6 +5880,19 @@ private:
    bool found;
 };
 
+static bool
+is_unsized_array_last_element(ir_variable *v)
+{
+   const glsl_type *interface_type = v->get_interface_type();
+   int length = interface_type->length;
+
+   assert(v->type->is_unsized_array());
+
+   /* Check if it is the last element of the interface */
+   if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+      return true;
+   return false;
+}
 
 ir_rvalue *
 ast_interface_block::hir(exec_list *instructions,
@@ -6253,18 +6266,29 @@ ast_interface_block::hir(exec_list *instructions,
          handle_tess_ctrl_shader_output_decl(state, loc, var);
 
       for (unsigned i = 0; i < num_variables; i++) {
-         /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
-          *
-          * "If an array is declared as the last member of a shader storage
-          * block and the size is not specified at compile-time, it is
-          * sized at run-time. In all other cases, arrays are sized only
-          * at compile-time."
-          */
-         if (state->es_shader && fields[i].type->is_unsized_array()) {
-             _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
-                              "only last member of a shader storage block "
-                              "can be defined as unsized array",
-                              fields[i].name);
+         if (fields[i].type->is_unsized_array()) {
+            if (var_mode == ir_var_shader_storage) {
+               if (i != (num_variables - 1)) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                   "only last member of a shader storage block "
+                                   "can be defined as unsized array",
+                                   fields[i].name);
+               }
+            } else {
+               /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+               *
+               * "If an array is declared as the last member of a shader storage
+               * block and the size is not specified at compile-time, it is
+               * sized at run-time. In all other cases, arrays are sized only
+               * at compile-time."
+               */
+               if (state->es_shader) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                 "only last member of a shader storage block "
+                                 "can be defined as unsized array",
+                                 fields[i].name);
+               }
+            }
          }
       }
 
@@ -6359,6 +6383,32 @@ ast_interface_block::hir(exec_list *instructions,
          var->data.explicit_binding = this->layout.flags.q.explicit_binding;
          var->data.binding = this->layout.binding;
 
+         if (var->type->is_unsized_array()) {
+            if (var->is_in_shader_storage_block()) {
+               if (!is_unsized_array_last_element(var)) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                   "only last member of a shader storage block "
+                                   "can be defined as unsized array",
+                                   var->name);
+               }
+               var->data.from_ssbo_unsized_array = true;
+            } else {
+               /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+               *
+               * "If an array is declared as the last member of a shader storage
+               * block and the size is not specified at compile-time, it is
+               * sized at run-time. In all other cases, arrays are sized only
+               * at compile-time."
+               */
+               if (state->es_shader) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                 "only last member of a shader storage block "
+                                 "can be defined as unsized array",
+                                 var->name);
+               }
+            }
+         }
+
          state->symbols->add_variable(var);
          instructions->push_tail(var);
       }
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index fb58c3b4ef6..b9df9761920 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1658,6 +1658,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.image_coherent = false;
    this->data.image_volatile = false;
    this->data.image_restrict = false;
+   this->data.from_ssbo_unsized_array = false;
 
    if (type != NULL) {
       if (type->base_type == GLSL_TYPE_SAMPLER)
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index cf1954b1257..48b6795cc09 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -452,6 +452,15 @@ public:
              this->interface_type != NULL;
    }
 
+   /**
+    * Determine whether or not a variable is part of a shader storage block.
+    */
+   inline bool is_in_shader_storage_block() const
+   {
+      return this->data.mode == ir_var_shader_storage &&
+             this->interface_type != NULL;
+   }
+
    /**
     * Determine whether or not a variable is the declaration of an interface
     * block
@@ -777,6 +786,11 @@ public:
       unsigned image_volatile:1;
       unsigned image_restrict:1;
 
+      /**
+       * ARB_shader_storage_buffer_object
+       */
+      unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */
+
       /**
        * Emit a warning if this variable is accessed.
        */
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 53e276cc589..47d8b5ad1bf 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -877,30 +877,40 @@ validate_intrastage_arrays(struct gl_shader_program *prog,
     * In addition, set the type of the linked variable to the
     * explicitly sized array.
     */
-   if (var->type->is_array() && existing->type->is_array() &&
-       (var->type->fields.array == existing->type->fields.array) &&
-       ((var->type->length == 0)|| (existing->type->length == 0))) {
-      if (var->type->length != 0) {
-         if (var->type->length <= existing->data.max_array_access) {
-            linker_error(prog, "%s `%s' declared as type "
-                         "`%s' but outermost dimension has an index"
-                         " of `%i'\n",
-                         mode_string(var),
-                         var->name, var->type->name,
-                         existing->data.max_array_access);
-         }
-         existing->type = var->type;
-         return true;
-      } else if (existing->type->length != 0) {
-         if(existing->type->length <= var->data.max_array_access) {
-            linker_error(prog, "%s `%s' declared as type "
-                         "`%s' but outermost dimension has an index"
-                         " of `%i'\n",
-                         mode_string(var),
-                         var->name, existing->type->name,
-                         var->data.max_array_access);
+   if (var->type->is_array() && existing->type->is_array()) {
+      if ((var->type->fields.array == existing->type->fields.array) &&
+          ((var->type->length == 0)|| (existing->type->length == 0))) {
+         if (var->type->length != 0) {
+            if (var->type->length <= existing->data.max_array_access) {
+               linker_error(prog, "%s `%s' declared as type "
+                           "`%s' but outermost dimension has an index"
+                           " of `%i'\n",
+                           mode_string(var),
+                           var->name, var->type->name,
+                           existing->data.max_array_access);
+            }
+            existing->type = var->type;
+            return true;
+         } else if (existing->type->length != 0) {
+            if(existing->type->length <= var->data.max_array_access &&
+               !existing->data.from_ssbo_unsized_array) {
+               linker_error(prog, "%s `%s' declared as type "
+                           "`%s' but outermost dimension has an index"
+                           " of `%i'\n",
+                           mode_string(var),
+                           var->name, existing->type->name,
+                           var->data.max_array_access);
+            }
+            return true;
          }
-         return true;
+      } else {
+         /* The arrays of structs could have different glsl_type pointers but
+          * they are actually the same type. Use record_compare() to check that.
+          */
+         if (existing->type->fields.array->is_record() &&
+             var->type->fields.array->is_record() &&
+             existing->type->fields.array->record_compare(var->type->fields.array))
+            return true;
       }
    }
    return false;
@@ -959,12 +969,24 @@ cross_validate_globals(struct gl_shader_program *prog,
                       && existing->type->record_compare(var->type)) {
                      existing->type = var->type;
                   } else {
-                     linker_error(prog, "%s `%s' declared as type "
-                                  "`%s' and type `%s'\n",
-                                  mode_string(var),
-                                  var->name, var->type->name,
-                                  existing->type->name);
-                     return;
+                     /* If it is an unsized array in a Shader Storage Block,
+                      * two different shaders can access to different elements.
+                      * Because of that, they might be converted to different
+                      * sized arrays, then check that they are compatible but
+                      * ignore the array size.
+                      */
+                     if (!(var->data.mode == ir_var_shader_storage &&
+                           var->data.from_ssbo_unsized_array &&
+                           existing->data.mode == ir_var_shader_storage &&
+                           existing->data.from_ssbo_unsized_array &&
+                           var->type->gl_type == existing->type->gl_type)) {
+                        linker_error(prog, "%s `%s' declared as type "
+                                    "`%s' and type `%s'\n",
+                                    mode_string(var),
+                                    var->name, var->type->name,
+                                    existing->type->name);
+                        return;
+                     }
                   }
 	       }
 	    }
@@ -1364,12 +1386,14 @@ public:
 
    virtual ir_visitor_status visit(ir_variable *var)
    {
-      fixup_type(&var->type, var->data.max_array_access);
+      fixup_type(&var->type, var->data.max_array_access,
+                 var->data.from_ssbo_unsized_array);
       if (var->type->is_interface()) {
          if (interface_contains_unsized_arrays(var->type)) {
             const glsl_type *new_type =
                resize_interface_members(var->type,
-                                        var->get_max_ifc_array_access());
+                                        var->get_max_ifc_array_access(),
+                                        var->is_in_shader_storage_block());
             var->type = new_type;
             var->change_interface_type(new_type);
          }
@@ -1378,7 +1402,8 @@ public:
          if (interface_contains_unsized_arrays(var->type->fields.array)) {
             const glsl_type *new_type =
                resize_interface_members(var->type->fields.array,
-                                        var->get_max_ifc_array_access());
+                                        var->get_max_ifc_array_access(),
+                                        var->is_in_shader_storage_block());
             var->change_interface_type(new_type);
             var->type = update_interface_members_array(var->type, new_type);
          }
@@ -1419,9 +1444,10 @@ private:
     * If the type pointed to by \c type represents an unsized array, replace
     * it with a sized array whose size is determined by max_array_access.
     */
-   static void fixup_type(const glsl_type **type, unsigned max_array_access)
+   static void fixup_type(const glsl_type **type, unsigned max_array_access,
+                          bool from_ssbo_unsized_array)
    {
-      if ((*type)->is_unsized_array()) {
+      if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) {
          *type = glsl_type::get_array_instance((*type)->fields.array,
                                                max_array_access + 1);
          assert(*type != NULL);
@@ -1464,14 +1490,23 @@ private:
     */
    static const glsl_type *
    resize_interface_members(const glsl_type *type,
-                            const unsigned *max_ifc_array_access)
+                            const unsigned *max_ifc_array_access,
+                            bool is_ssbo)
    {
       unsigned num_fields = type->length;
       glsl_struct_field *fields = new glsl_struct_field[num_fields];
       memcpy(fields, type->fields.structure,
              num_fields * sizeof(*fields));
       for (unsigned i = 0; i < num_fields; i++) {
-         fixup_type(&fields[i].type, max_ifc_array_access[i]);
+         /* If SSBO last member is unsized array, we don't replace it by a sized
+          * array.
+          */
+         if (is_ssbo && i == (num_fields - 1))
+            fixup_type(&fields[i].type, max_ifc_array_access[i],
+                       true);
+         else
+            fixup_type(&fields[i].type, max_ifc_array_access[i],
+                       false);
       }
       glsl_interface_packing packing =
          (glsl_interface_packing) type->interface_packing;
-- 
cgit v1.2.3


From 68f5a4e6d2c5bec39b425a41eac6b190a8a7d14a Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 22 Sep 2015 11:05:28 +0200
Subject: glsl: fix indention in glsl_types.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No functional changes.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_types.cpp | 140 ++++++++++++++++++++++++------------------------
 1 file changed, 70 insertions(+), 70 deletions(-)

(limited to 'src')

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 97c79fa4ca1..b4525eb9cf2 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -45,8 +45,8 @@ glsl_type::init_ralloc_type_ctx(void)
 }
 
 glsl_type::glsl_type(GLenum gl_type,
-		     glsl_base_type base_type, unsigned vector_elements,
-		     unsigned matrix_columns, const char *name) :
+                     glsl_base_type base_type, unsigned vector_elements,
+                     unsigned matrix_columns, const char *name) :
    gl_type(gl_type),
    base_type(base_type),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -69,8 +69,8 @@ glsl_type::glsl_type(GLenum gl_type,
 }
 
 glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
-		     enum glsl_sampler_dim dim, bool shadow, bool array,
-		     unsigned type, const char *name) :
+                     enum glsl_sampler_dim dim, bool shadow, bool array,
+                     unsigned type, const char *name) :
    gl_type(gl_type),
    base_type(base_type),
    sampler_dimensionality(dim), sampler_shadow(shadow),
@@ -96,7 +96,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
 }
 
 glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-		     const char *name) :
+                     const char *name) :
    gl_type(0),
    base_type(GLSL_TYPE_STRUCT),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -112,12 +112,12 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
    assert(name != NULL);
    this->name = ralloc_strdup(this->mem_ctx, name);
    this->fields.structure = ralloc_array(this->mem_ctx,
-					 glsl_struct_field, length);
+                                         glsl_struct_field, length);
 
    for (i = 0; i < length; i++) {
       this->fields.structure[i].type = fields[i].type;
       this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-						     fields[i].name);
+                                                     fields[i].name);
       this->fields.structure[i].location = fields[i].location;
       this->fields.structure[i].interpolation = fields[i].interpolation;
       this->fields.structure[i].centroid = fields[i].centroid;
@@ -130,7 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
 }
 
 glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-		     enum glsl_interface_packing packing, const char *name) :
+                     enum glsl_interface_packing packing, const char *name) :
    gl_type(0),
    base_type(GLSL_TYPE_INTERFACE),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -146,11 +146,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
    assert(name != NULL);
    this->name = ralloc_strdup(this->mem_ctx, name);
    this->fields.structure = ralloc_array(this->mem_ctx,
-					 glsl_struct_field, length);
+                                         glsl_struct_field, length);
    for (i = 0; i < length; i++) {
       this->fields.structure[i].type = fields[i].type;
       this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-						     fields[i].name);
+                                                     fields[i].name);
       this->fields.structure[i].location = fields[i].location;
       this->fields.structure[i].interpolation = fields[i].interpolation;
       this->fields.structure[i].centroid = fields[i].centroid;
@@ -186,8 +186,8 @@ glsl_type::contains_sampler() const
       return this->fields.array->contains_sampler();
    } else if (this->is_record()) {
       for (unsigned int i = 0; i < this->length; i++) {
-	 if (this->fields.structure[i].type->contains_sampler())
-	    return true;
+         if (this->fields.structure[i].type->contains_sampler())
+            return true;
       }
       return false;
    } else {
@@ -203,8 +203,8 @@ glsl_type::contains_integer() const
       return this->fields.array->contains_integer();
    } else if (this->is_record()) {
       for (unsigned int i = 0; i < this->length; i++) {
-	 if (this->fields.structure[i].type->contains_integer())
-	    return true;
+         if (this->fields.structure[i].type->contains_integer())
+            return true;
       }
       return false;
    } else {
@@ -219,8 +219,8 @@ glsl_type::contains_double() const
       return this->fields.array->contains_double();
    } else if (this->is_record()) {
       for (unsigned int i = 0; i < this->length; i++) {
-	 if (this->fields.structure[i].type->contains_double())
-	    return true;
+         if (this->fields.structure[i].type->contains_double())
+            return true;
       }
       return false;
    } else {
@@ -255,8 +255,8 @@ glsl_type::contains_subroutine() const
       return this->fields.array->contains_subroutine();
    } else if (this->is_record()) {
       for (unsigned int i = 0; i < this->length; i++) {
-	 if (this->fields.structure[i].type->contains_subroutine())
-	    return true;
+         if (this->fields.structure[i].type->contains_subroutine())
+            return true;
       }
       return false;
    } else {
@@ -301,8 +301,8 @@ glsl_type::contains_image() const
       return this->fields.array->contains_image();
    } else if (this->is_record()) {
       for (unsigned int i = 0; i < this->length; i++) {
-	 if (this->fields.structure[i].type->contains_image())
-	    return true;
+         if (this->fields.structure[i].type->contains_image())
+            return true;
       }
       return false;
    } else {
@@ -502,21 +502,21 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
    if (columns == 1) {
       switch (base_type) {
       case GLSL_TYPE_UINT:
-	 return uvec(rows);
+         return uvec(rows);
       case GLSL_TYPE_INT:
-	 return ivec(rows);
+         return ivec(rows);
       case GLSL_TYPE_FLOAT:
-	 return vec(rows);
+         return vec(rows);
       case GLSL_TYPE_DOUBLE:
-	 return dvec(rows);
+         return dvec(rows);
       case GLSL_TYPE_BOOL:
-	 return bvec(rows);
+         return bvec(rows);
       default:
-	 return error_type;
+         return error_type;
       }
    } else {
       if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
-	 return error_type;
+         return error_type;
 
       /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
        * combinations are valid:
@@ -738,10 +738,10 @@ glsl_type::record_compare(const glsl_type *b) const
 
    for (unsigned i = 0; i < this->length; i++) {
       if (this->fields.structure[i].type != b->fields.structure[i].type)
-	 return false;
+         return false;
       if (strcmp(this->fields.structure[i].name,
-		 b->fields.structure[i].name) != 0)
-	 return false;
+                 b->fields.structure[i].name) != 0)
+         return false;
       if (this->fields.structure[i].matrix_layout
          != b->fields.structure[i].matrix_layout)
         return false;
@@ -802,8 +802,8 @@ glsl_type::record_key_hash(const void *a)
 
 const glsl_type *
 glsl_type::get_record_instance(const glsl_struct_field *fields,
-			       unsigned num_fields,
-			       const char *name)
+                               unsigned num_fields,
+                               const char *name)
 {
    const glsl_type key(fields, num_fields, name);
 
@@ -836,9 +836,9 @@ glsl_type::get_record_instance(const glsl_struct_field *fields,
 
 const glsl_type *
 glsl_type::get_interface_instance(const glsl_struct_field *fields,
-				  unsigned num_fields,
-				  enum glsl_interface_packing packing,
-				  const char *block_name)
+                                  unsigned num_fields,
+                                  enum glsl_interface_packing packing,
+                                  const char *block_name)
 {
    const glsl_type key(fields, num_fields, packing, block_name);
 
@@ -976,7 +976,7 @@ glsl_type::field_type(const char *name) const
 
    for (unsigned i = 0; i < this->length; i++) {
       if (strcmp(name, this->fields.structure[i].name) == 0)
-	 return this->fields.structure[i].type;
+         return this->fields.structure[i].type;
    }
 
    return error_type;
@@ -992,7 +992,7 @@ glsl_type::field_index(const char *name) const
 
    for (unsigned i = 0; i < this->length; i++) {
       if (strcmp(name, this->fields.structure[i].name) == 0)
-	 return i;
+         return i;
    }
 
    return -1;
@@ -1017,7 +1017,7 @@ glsl_type::component_slots() const
       unsigned size = 0;
 
       for (unsigned i = 0; i < this->length; i++)
-	 size += this->fields.structure[i].type->component_slots();
+         size += this->fields.structure[i].type->component_slots();
 
       return size;
    }
@@ -1153,12 +1153,12 @@ glsl_type::std140_base_alignment(bool row_major) const
    if (this->is_scalar() || this->is_vector()) {
       switch (this->vector_elements) {
       case 1:
-	 return N;
+         return N;
       case 2:
-	 return 2 * N;
+         return 2 * N;
       case 3:
       case 4:
-	 return 4 * N;
+         return 4 * N;
       }
    }
 
@@ -1183,13 +1183,13 @@ glsl_type::std140_base_alignment(bool row_major) const
     */
    if (this->is_array()) {
       if (this->fields.array->is_scalar() ||
-	  this->fields.array->is_vector() ||
-	  this->fields.array->is_matrix()) {
-	 return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+          this->fields.array->is_vector() ||
+          this->fields.array->is_matrix()) {
+         return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
       } else {
-	 assert(this->fields.array->is_record() ||
+         assert(this->fields.array->is_record() ||
                 this->fields.array->is_array());
-	 return this->fields.array->std140_base_alignment(row_major);
+         return this->fields.array->std140_base_alignment(row_major);
       }
    }
 
@@ -1208,11 +1208,11 @@ glsl_type::std140_base_alignment(bool row_major) const
       int r = this->vector_elements;
 
       if (row_major) {
-	 vec_type = get_instance(base_type, c, 1);
-	 array_type = glsl_type::get_array_instance(vec_type, r);
+         vec_type = get_instance(base_type, c, 1);
+         array_type = glsl_type::get_array_instance(vec_type, r);
       } else {
-	 vec_type = get_instance(base_type, r, 1);
-	 array_type = glsl_type::get_array_instance(vec_type, c);
+         vec_type = get_instance(base_type, r, 1);
+         array_type = glsl_type::get_array_instance(vec_type, c);
       }
 
       return array_type->std140_base_alignment(false);
@@ -1242,9 +1242,9 @@ glsl_type::std140_base_alignment(bool row_major) const
             field_row_major = false;
          }
 
-	 const struct glsl_type *field_type = this->fields.structure[i].type;
-	 base_alignment = MAX2(base_alignment,
-			       field_type->std140_base_alignment(field_row_major));
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         base_alignment = MAX2(base_alignment,
+                               field_type->std140_base_alignment(field_row_major));
       }
       return base_alignment;
    }
@@ -1296,25 +1296,25 @@ glsl_type::std140_size(bool row_major) const
       unsigned int array_len;
 
       if (this->is_array()) {
-	 element_type = this->fields.array;
-	 array_len = this->length;
+         element_type = this->fields.array;
+         array_len = this->length;
       } else {
-	 element_type = this;
-	 array_len = 1;
+         element_type = this;
+         array_len = 1;
       }
 
       if (row_major) {
          vec_type = get_instance(element_type->base_type,
                                  element_type->matrix_columns, 1);
 
-	 array_len *= element_type->vector_elements;
+         array_len *= element_type->vector_elements;
       } else {
-	 vec_type = get_instance(element_type->base_type,
-				 element_type->vector_elements, 1);
-	 array_len *= element_type->matrix_columns;
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->vector_elements, 1);
+         array_len *= element_type->matrix_columns;
       }
       const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
-								  array_len);
+                                                                  array_len);
 
       return array_type->std140_size(false);
    }
@@ -1331,11 +1331,11 @@ glsl_type::std140_size(bool row_major) const
     */
    if (this->is_array()) {
       if (this->fields.array->is_record()) {
-	 return this->length * this->fields.array->std140_size(row_major);
+         return this->length * this->fields.array->std140_size(row_major);
       } else {
-	 unsigned element_base_align =
-	    this->fields.array->std140_base_alignment(row_major);
-	 return this->length * MAX2(element_base_align, 16);
+         unsigned element_base_align =
+            this->fields.array->std140_base_alignment(row_major);
+         return this->length * MAX2(element_base_align, 16);
       }
    }
 
@@ -1365,10 +1365,10 @@ glsl_type::std140_size(bool row_major) const
             field_row_major = false;
          }
 
-	 const struct glsl_type *field_type = this->fields.structure[i].type;
-	 unsigned align = field_type->std140_base_alignment(field_row_major);
-	 size = glsl_align(size, align);
-	 size += field_type->std140_size(field_row_major);
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         unsigned align = field_type->std140_base_alignment(field_row_major);
+         size = glsl_align(size, align);
+         size += field_type->std140_size(field_row_major);
 
          max_align = MAX2(align, max_align);
 
-- 
cgit v1.2.3


From 1440d2a6833902d9c966fe8ad7db46a7f787391c Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 21 Sep 2015 09:09:11 +0200
Subject: glsl: Add unsized array support to glsl_type::std140_size()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_types.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index b4525eb9cf2..07d72489057 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -1351,7 +1351,7 @@ glsl_type::std140_size(bool row_major) const
     *     rounded up to the next multiple of the base alignment of the
     *     structure.
     */
-   if (this->is_record()) {
+   if (this->is_record() || this->is_interface()) {
       unsigned size = 0;
       unsigned max_align = 0;
 
@@ -1367,6 +1367,11 @@ glsl_type::std140_size(bool row_major) const
 
          const struct glsl_type *field_type = this->fields.structure[i].type;
          unsigned align = field_type->std140_base_alignment(field_row_major);
+
+         /* Ignore unsized arrays when calculating size */
+         if (field_type->is_unsized_array())
+            continue;
+
          size = glsl_align(size, align);
          size += field_type->std140_size(field_row_major);
 
-- 
cgit v1.2.3


From 273f61a0051a794d1a39d70fb1dbf46a3ca3c63f Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 13 Apr 2015 16:17:07 +0200
Subject: glsl: Add parser/compiler support for unsized array's length()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unsized array length is computed with the following formula:

array.length() =
   max((buffer_object_size - offset_of_array) / stride_of_array, 0)

Of these, only the buffer size needs to be provided by the backends, the
frontend already knows the values of the two other variables.

This patch identifies the cases where we need to get the length of an
unsized array, injecting ir_unop_ssbo_unsized_array_length expressions
that will be lowered (in a later patch) to inject the formula mentioned
above.

It also adds the ir_unop_get_buffer_size expression that drivers will
implement to provide the buffer length.

v2:
- Do not define a triop that will force backends to implement the
  entire formula, they should only need to provide the buffer size
  since the other values are known by the frontend (Curro).

v3:
- Call state->has_shader_storage_buffer_objects() in ast_function.cpp instead
  of using state->ARB_shader_storage_buffer_object_enable (Tapani).

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_function.cpp                             | 13 +++++++++----
 src/glsl/ir.cpp                                       |  7 +++++++
 src/glsl/ir.h                                         | 19 ++++++++++++++++++-
 src/glsl/ir_validate.cpp                              | 11 +++++++++++
 src/glsl/link_uniforms.cpp                            | 10 ++++++++--
 .../drivers/dri/i965/brw_fs_channel_expressions.cpp   |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp        |  8 ++++++++
 src/mesa/program/ir_to_mesa.cpp                       |  2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp            |  5 +++++
 9 files changed, 70 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 803edf5a14d..ff5ecb954f0 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -1593,11 +1593,16 @@ ast_function_expression::handle_method(exec_list *instructions,
 
       if (op->type->is_array()) {
          if (op->type->is_unsized_array()) {
-            _mesa_glsl_error(&loc, state, "length called on unsized array");
-            goto fail;
+            if (!state->has_shader_storage_buffer_objects()) {
+               _mesa_glsl_error(&loc, state, "length called on unsized array"
+                                             " only available with "
+                                             "ARB_shader_storage_buffer_object");
+            }
+            /* Calculate length of an unsized array in run-time */
+            result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op);
+         } else {
+            result = new(ctx) ir_constant(op->type->array_size());
          }
-
-         result = new(ctx) ir_constant(op->type->array_size());
       } else if (op->type->is_vector()) {
          if (state->ARB_shading_language_420pack_enable) {
             /* .length() returns int. */
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index b9df9761920..2c45b9edc0f 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -342,6 +342,11 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
 					   op0->type->vector_elements, 1);
       break;
 
+   case ir_unop_get_buffer_size:
+   case ir_unop_ssbo_unsized_array_length:
+      this->type = glsl_type::int_type;
+      break;
+
    default:
       assert(!"not reached: missing automatic type setup for ir_expression");
       this->type = op0->type;
@@ -571,6 +576,8 @@ static const char *const operator_strs[] = {
    "noise",
    "subroutine_to_int",
    "interpolate_at_centroid",
+   "get_buffer_size",
+   "ssbo_unsized_array_length",
    "+",
    "-",
    "*",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 48b6795cc09..43a2bf0ae1c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1424,10 +1424,27 @@ enum ir_expression_operation {
     */
    ir_unop_interpolate_at_centroid,
 
+   /**
+    * Ask the driver for the total size of a buffer block.
+    *
+    * operand0 is the ir_constant buffer block index in the linked shader.
+    */
+   ir_unop_get_buffer_size,
+
+   /**
+    * Calculate length of an unsized array inside a buffer block.
+    * This opcode is going to be replaced in a lowering pass inside
+    * the linker.
+    *
+    * operand0 is the unsized array's ir_value for the calculation
+    * of its length.
+    */
+   ir_unop_ssbo_unsized_array_length,
+
    /**
     * A sentinel marking the last of the unary operations.
     */
-   ir_last_unop = ir_unop_interpolate_at_centroid,
+   ir_last_unop = ir_unop_ssbo_unsized_array_length,
 
    ir_binop_add,
    ir_binop_sub,
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 3f0dea74e27..935571ae1d6 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -409,6 +409,17 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type->is_float());
       break;
 
+   case ir_unop_get_buffer_size:
+      assert(ir->type == glsl_type::int_type);
+      assert(ir->operands[0]->type == glsl_type::uint_type);
+      break;
+
+   case ir_unop_ssbo_unsized_array_length:
+      assert(ir->type == glsl_type::int_type);
+      assert(ir->operands[0]->type->is_array());
+      assert(ir->operands[0]->type->is_unsized_array());
+      break;
+
    case ir_unop_d2f:
       assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 238546ba22e..67a6e1bea17 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -231,9 +231,15 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
       if (record_type == NULL && t->fields.array->is_record())
          record_type = t->fields.array;
 
-      record_array_count *= t->length;
+      unsigned length = t->length;
+      /* Shader storage block unsized arrays: add subscript [0] to variable
+       * names */
+      if (t->is_unsized_array())
+         length = 1;
 
-      for (unsigned i = 0; i < t->length; i++) {
+      record_array_count *= length;
+
+      for (unsigned i = 0; i < length; i++) {
 	 size_t new_length = name_length;
 
 	 /* Append the subscript to the current variable name */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index a8883a35ef2..277b6cc3a60 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -379,6 +379,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    }
 
    case ir_binop_ubo_load:
+   case ir_unop_get_buffer_size:
       unreachable("not yet supported");
 
    case ir_triop_fma:
@@ -430,6 +431,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    case ir_triop_vector_insert:
    case ir_quadop_bitfield_insert:
    case ir_quadop_vector:
+   case ir_unop_ssbo_unsized_array_length:
       unreachable("should have been lowered");
 
    case ir_unop_unpack_half_2x16_split_x:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ac086a72eb1..3443e5cb759 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1585,6 +1585,10 @@ vec4_visitor::visit(ir_expression *ir)
       emit(MOV(result_dst, op[0]));
       break;
 
+   case ir_unop_ssbo_unsized_array_length:
+      unreachable("not reached: should be handled by lower_ubo_reference");
+      break;
+
    case ir_binop_add:
       emit(ADD(result_dst, op[0], op[1]));
       break;
@@ -1791,6 +1795,10 @@ vec4_visitor::visit(ir_expression *ir)
       emit(RNDE(result_dst, op[0]));
       break;
 
+   case ir_unop_get_buffer_size:
+      unreachable("not reached: not implemented");
+      break;
+
    case ir_binop_min:
       emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
       break;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index afb400f714c..4201a80cf62 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1344,9 +1344,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_dFdy_coarse:
    case ir_unop_dFdy_fine:
    case ir_unop_subroutine_to_int:
+   case ir_unop_get_buffer_size:
       assert(!"not supported");
       break;
 
+   case ir_unop_ssbo_unsized_array_length:
    case ir_quadop_vector:
       /* This operation should have already been handled.
        */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5896f6518b5..633e90ffa38 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2217,10 +2217,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_triop_vector_insert:
    case ir_binop_carry:
    case ir_binop_borrow:
+   case ir_unop_ssbo_unsized_array_length:
       /* This operation is not supported, or should have already been handled.
        */
       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
       break;
+
+   case ir_unop_get_buffer_size:
+      assert(!"Not implemented yet");
+      break;
    }
 
    this->result = result_src;
-- 
cgit v1.2.3


From 750c694474776fd545262aaf3190ccd92d6bb0ec Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 8 Jul 2015 17:42:14 +0200
Subject: glsl: implement unsized array length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Reduce the number of lines over 80 character line width
  limit. (Thomas Hellan)

v3:
- Inject the formula to compute the array length in the IR, backends
  only need to provide the buffer size (Curro)
- Create an auxiliary function to simplify code (Jordan Justen)
- Rename variables (Jordan Justen)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 182 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 8b0810781fe..8694383c4ed 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -166,6 +166,18 @@ public:
                     bool row_major, int matrix_columns,
                     unsigned write_mask);
 
+   ir_visitor_status visit_enter(class ir_expression *);
+   ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
+   void check_ssbo_unsized_array_length_expression(class ir_expression *);
+   void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+   ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
+                                                    ir_dereference *,
+                                                    ir_variable *);
+   ir_expression *emit_ssbo_get_buffer_size();
+
+   unsigned calculate_unsized_array_stride(ir_dereference *deref);
+
    void *mem_ctx;
    struct gl_shader *shader;
    struct gl_uniform_buffer_variable *ubo_var;
@@ -738,6 +750,175 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
                row_major, matrix_columns, write_mask);
 }
 
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+   check_ssbo_unsized_array_length_expression(ir);
+   return rvalue_visit(ir);
+}
+
+ir_expression *
+lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
+{
+   if (expr->operation !=
+       ir_expression_operation(ir_unop_ssbo_unsized_array_length))
+      return NULL;
+
+   ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+   if (!rvalue ||
+       !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+      return NULL;
+
+   ir_dereference *deref = expr->operands[0]->as_dereference();
+   if (!deref)
+      return NULL;
+
+   ir_variable *var = expr->operands[0]->variable_referenced();
+   if (!var || !var->is_in_shader_storage_block())
+      return NULL;
+   return process_ssbo_unsized_array_length(&rvalue, deref, var);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
+{
+   if (ir->operation ==
+       ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+         /* Don't replace this unop if it is found alone. It is going to be
+          * removed by the optimization passes or replaced if it is part of
+          * an ir_assignment or another ir_expression.
+          */
+         return;
+   }
+
+   for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+      if (ir->operands[i]->ir_type != ir_type_expression)
+         continue;
+      ir_expression *expr = (ir_expression *) ir->operands[i];
+      ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+      if (!temp)
+         continue;
+
+      delete expr;
+      ir->operands[i] = temp;
+   }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
+{
+   if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+      return;
+
+   ir_expression *expr = (ir_expression *) ir->rhs;
+   ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+   if (!temp)
+      return;
+
+   delete expr;
+   ir->rhs = temp;
+   return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+{
+   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
+                                     glsl_type::int_type,
+                                     block_ref);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref)
+{
+   unsigned array_stride = 0;
+
+   switch (deref->ir_type) {
+   case ir_type_dereference_variable:
+   {
+      ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+      const struct glsl_type *unsized_array_type = NULL;
+      /* An unsized array can be sized by other lowering passes, so pick
+       * the first field of the array which has the data type of the unsized
+       * array.
+       */
+      unsized_array_type = deref_var->var->type->fields.array;
+
+      /* Whether or not the field is row-major (because it might be a
+       * bvec2 or something) does not affect the array itself. We need
+       * to know whether an array element in its entirety is row-major.
+       */
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_var);
+
+      array_stride = unsized_array_type->std140_size(array_row_major);
+      array_stride = glsl_align(array_stride, 16);
+      break;
+   }
+   case ir_type_dereference_record:
+   {
+      ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+      const struct glsl_type *deref_record_type =
+         deref_record->record->as_dereference()->type;
+      unsigned record_length = deref_record_type->length;
+      /* Unsized array is always the last element of the interface */
+      const struct glsl_type *unsized_array_type =
+         deref_record_type->fields.structure[record_length - 1].type->fields.array;
+
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_record);
+      array_stride = unsized_array_type->std140_size(array_row_major);
+      array_stride = glsl_align(array_stride, 16);
+      break;
+   }
+   default:
+      unreachable("Unsupported dereference type");
+   }
+   return array_stride;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
+                                                               ir_dereference *deref,
+                                                               ir_variable *var)
+{
+   mem_ctx = ralloc_parent(*rvalue);
+
+   ir_rvalue *base_offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   int unsized_array_stride = calculate_unsized_array_stride(deref);
+
+   /* Compute the offset to the start if the dereference as well as other
+    * information we need to calculate the length.
+    */
+   setup_for_load_or_store(var, deref,
+                           &base_offset, &const_offset,
+                           &row_major, &matrix_columns);
+   /* array.length() =
+    *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
+    */
+   ir_expression *buffer_size = emit_ssbo_get_buffer_size();
+
+   ir_expression *offset_of_array = new(mem_ctx)
+      ir_expression(ir_binop_add, base_offset,
+                    new(mem_ctx) ir_constant(const_offset));
+   ir_expression *offset_of_array_int = new(mem_ctx)
+      ir_expression(ir_unop_u2i, offset_of_array);
+
+   ir_expression *sub = new(mem_ctx)
+      ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
+   ir_expression *div =  new(mem_ctx)
+      ir_expression(ir_binop_div, sub,
+                    new(mem_ctx) ir_constant(unsized_array_stride));
+   ir_expression *max = new(mem_ctx)
+      ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
+
+   return max;
+}
+
 void
 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
 {
@@ -777,6 +958,7 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
+   check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_store(ir);
    return rvalue_visit(ir);
 }
-- 
cgit v1.2.3


From 003ce30e36b1a0757053a1f1407cdd314a32db10 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 1 Jun 2015 09:44:55 +0200
Subject: nir: Implement ir_unop_get_buffer_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is how backends provide the buffer size required to compute
the size of unsized arrays in the previous patch

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp  | 10 ++++++++++
 src/glsl/nir/nir_intrinsics.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index c13f953d914..d114da49272 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1322,6 +1322,16 @@ nir_visitor::visit(ir_expression *ir)
          unreachable("not reached");
       }
       break;
+   case ir_unop_get_buffer_size: {
+      nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+         this->shader,
+         nir_intrinsic_get_buffer_size);
+      load->num_components = ir->type->vector_elements;
+      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      add_instr(&load->instr, ir->type->vector_elements);
+      return;
+   }
+
    case ir_binop_add:
    case ir_binop_sub:
    case ir_binop_mul:
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b21460da5c0..271dc42d984 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -61,6 +61,13 @@ INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
 INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
+/*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
 /*
  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
  * around/optimized in general
-- 
cgit v1.2.3


From 6485880232df46c0cdded0b063b8841a7855bd32 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 28 Aug 2015 09:39:49 +0200
Subject: i965/vec4: Implement VS_OPCODE_GET_BUFFER_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Notice that Skylake needs to include a header in the sampler message
so it will need some tweaks to work there.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_defines.h          |  3 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.h             |  6 +++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 31 ++++++++++++++++++++++++
 5 files changed, 44 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f0797985a19..0eada4fa380 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1084,6 +1084,9 @@ enum opcode {
    VS_OPCODE_PULL_CONSTANT_LOAD,
    VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
    VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+   VS_OPCODE_GET_BUFFER_SIZE,
+
    VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
 
    /**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index c311a038e4f..13771b7def0 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -745,6 +745,9 @@ brw_instruction_name(enum opcode op)
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
       return "set_simd4x2_header_gen9";
 
+   case VS_OPCODE_GET_BUFFER_SIZE:
+      return "vs_get_buffer_size";
+
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
       return "unpack_flags_simd4x2";
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c6510453479..c61b38548f7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -332,6 +332,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
    case SHADER_OPCODE_SAMPLEINFO:
+   case VS_OPCODE_GET_BUFFER_SIZE:
       return inst->header_size;
    default:
       unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index de74ec9bb68..ed711902de4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -560,6 +560,12 @@ private:
                                          struct brw_reg offset);
    void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
                                          struct brw_reg dst);
+
+   void generate_get_buffer_size(vec4_instruction *inst,
+                                 struct brw_reg dst,
+                                 struct brw_reg src,
+                                 struct brw_reg index);
+
    void generate_unpack_flags(struct brw_reg dst);
 
    const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 05f20441adb..c7546070bf9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1032,6 +1032,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
    brw_mark_surface_used(&prog_data->base, surf_index);
 }
 
+void
+vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
+                                         struct brw_reg dst,
+                                         struct brw_reg src,
+                                         struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+          surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   brw_SAMPLE(p,
+              dst,
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              1, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
 void
 vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                  struct brw_reg dst,
@@ -1409,6 +1435,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_set_simd4x2_header_gen9(inst, dst);
          break;
 
+
+      case VS_OPCODE_GET_BUFFER_SIZE:
+         generate_get_buffer_size(inst, dst, src[0], src[1]);
+         break;
+
       case GS_OPCODE_URB_WRITE:
          generate_gs_urb_write(inst);
          break;
-- 
cgit v1.2.3


From 65d7f5fe9f4284f7de867b4c412f086c6dcca176 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 26 Aug 2015 12:21:54 +0200
Subject: i965/vec4/nir: implement nir_intrinsic_get_buffer_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index c681ae438ef..0db2291d84e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -538,6 +538,32 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
+   case nir_intrinsic_get_buffer_size: {
+      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+      unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                   ubo_index);
+      dst_reg result_dst = get_nir_dest(instr->dest);
+      vec4_instruction *inst = new(mem_ctx)
+         vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+      inst->base_mrf = 2;
+      inst->mlen = 1; /* always at least one */
+      inst->src[1] = src_reg(surf_index);
+
+      /* MRF for the first parameter */
+      src_reg lod = src_reg(0);
+      int param_base = inst->base_mrf;
+      int writemask = WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+      emit(inst);
+      break;
+   }
+
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-- 
cgit v1.2.3


From b23eb643ebab9ef250ce026a7e2f651de9be10f6 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 13 Apr 2015 16:55:49 +0200
Subject: i965/fs: Implement FS_OPCODE_GET_BUFFER_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_defines.h        |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h             |  3 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 47 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp       |  3 ++
 5 files changed, 55 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0eada4fa380..7045d2ce043 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1067,6 +1067,7 @@ enum opcode {
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_GET_BUFFER_SIZE,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
    FS_OPCODE_SET_SAMPLE_ID,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 618bbd2866a..a8f5520fb94 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -883,6 +883,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
+   case FS_OPCODE_GET_BUFFER_SIZE:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
       return 1;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1886dd8e5ff..58205084549 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -431,6 +431,9 @@ private:
 			 struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                      struct brw_reg sampler_index);
+   void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
+                                 struct brw_reg src,
+                                 struct brw_reg surf_index);
    void generate_math_gen6(fs_inst *inst,
                            struct brw_reg dst,
                            struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c65084d0608..6f8b75e339f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -543,6 +543,50 @@ fs_generator::generate_math_g45(fs_inst *inst,
              BRW_MATH_PRECISION_FULL);
 }
 
+void
+fs_generator::generate_get_buffer_size(fs_inst *inst,
+                                       struct brw_reg dst,
+                                       struct brw_reg src,
+                                       struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   uint32_t simd_mode;
+   int rlen = 4;
+
+   switch (inst->exec_size) {
+   case 8:
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+      break;
+   case 16:
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+      break;
+   default:
+      unreachable("Invalid width for texture instruction");
+   }
+
+   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+      rlen = 8;
+      dst = vec16(dst);
+   }
+
+   brw_SAMPLE(p,
+              retype(dst, BRW_REGISTER_TYPE_UW),
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              rlen, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              simd_mode,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(prog_data, surf_index.dw1.ud);
+}
+
 void
 fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                            struct brw_reg sampler_index)
@@ -1916,6 +1960,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          src[0].subnr = 4 * type_sz(src[0].type);
          brw_MOV(p, dst, stride(src[0], 8, 4, 1));
          break;
+      case FS_OPCODE_GET_BUFFER_SIZE:
+         generate_get_buffer_size(inst, dst, src[0], src[1]);
+         break;
       case SHADER_OPCODE_TEX:
       case FS_OPCODE_TXB:
       case SHADER_OPCODE_TXD:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 13771b7def0..785cb2722fe 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -697,6 +697,9 @@ brw_instruction_name(enum opcode op)
    case FS_OPCODE_PIXEL_Y:
       return "pixel_y";
 
+   case FS_OPCODE_GET_BUFFER_SIZE:
+      return "fs_get_buffer_size";
+
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return "uniform_pull_const";
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
-- 
cgit v1.2.3


From f5dd2c182275a9de57e5186491012c402a6248e0 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 1 Jun 2015 09:45:51 +0200
Subject: i965/fs/nir: implement nir_intrinsic_get_buffer_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Remove inst->regs_written assignment as the instruction only
  writes to one register.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index e4ddadc79a8..97aef61657f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1734,6 +1734,30 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_get_buffer_size: {
+      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+      unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      int reg_width = dispatch_width / 8;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      /* Set LOD = 0 */
+      fs_reg source = fs_reg(0);
+
+      int mlen = 1 * reg_width;
+      fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+                                  BRW_REGISTER_TYPE_UD);
+      bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+
+      fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+      fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+                               src_payload, surf_index);
+      inst->header_size = 0;
+      inst->mlen = mlen;
+      bld.emit(inst);
+      break;
+   }
+
    default:
       unreachable("unknown intrinsic");
    }
-- 
cgit v1.2.3


From eaa6f01c8d0c49d8ae637d8104163da9c45526ca Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 16 Apr 2015 13:18:01 +0200
Subject: i965/wm: emit null buffer surfaces when null buffers are attached
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise we can expect odd things to happen if, for example, we ask
for the size of the attached buffer from shader code, since that
might query this value from the surface we uploaded and get random
results.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 44 ++++++++++++++----------
 1 file changed, 26 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 24ff2d6a443..61031d29db3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -939,28 +939,36 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
          struct gl_uniform_buffer_binding *binding;
          binding =
             &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
-         intel_bo = intel_buffer_object(binding->BufferObject);
-         drm_intel_bo *bo =
-            intel_bufferobj_buffer(brw, intel_bo,
-                                   binding->Offset,
-                                   binding->BufferObject->Size - binding->Offset);
-         brw_create_constant_surface(brw, bo, binding->Offset,
-                                     bo->size - binding->Offset,
-                                     &surf_offsets[i],
-                                     dword_pitch);
+         if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+            brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+         } else {
+            intel_bo = intel_buffer_object(binding->BufferObject);
+            drm_intel_bo *bo =
+               intel_bufferobj_buffer(brw, intel_bo,
+                                      binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset);
+            brw_create_constant_surface(brw, bo, binding->Offset,
+                                        bo->size - binding->Offset,
+                                        &surf_offsets[i],
+                                        dword_pitch);
+         }
       } else {
          struct gl_shader_storage_buffer_binding *binding;
          binding =
             &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
-         intel_bo = intel_buffer_object(binding->BufferObject);
-         drm_intel_bo *bo =
-            intel_bufferobj_buffer(brw, intel_bo,
-                                   binding->Offset,
-                                   binding->BufferObject->Size - binding->Offset);
-         brw_create_buffer_surface(brw, bo, binding->Offset,
-                                   bo->size - binding->Offset,
-                                   &surf_offsets[i],
-                                   dword_pitch);
+         if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+            brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+         } else {
+            intel_bo = intel_buffer_object(binding->BufferObject);
+            drm_intel_bo *bo =
+               intel_bufferobj_buffer(brw, intel_bo,
+                                      binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset);
+            brw_create_buffer_surface(brw, bo, binding->Offset,
+                                      bo->size - binding->Offset,
+                                      &surf_offsets[i],
+                                      dword_pitch);
+         }
       }
    }
 
-- 
cgit v1.2.3


From 5bb5eeea008a01f5bc44570915ea4b7ac432cef9 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 24 Apr 2015 07:02:19 +0200
Subject: i965/wm: surfaces should have the API buffer size, not the drm buffer
 size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The returned drm buffer object has a size multiple of 4096 but that should not
be exposed to the API user, which is working with a different size.

As far as I can see this problem is only visible in the calculation of the
length of unsized arrays used in SSBOs, as the implementation of this needs
to query the underlying buffer size via a message.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 61031d29db3..f2aaa0b178e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -948,7 +948,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
                                       binding->Offset,
                                       binding->BufferObject->Size - binding->Offset);
             brw_create_constant_surface(brw, bo, binding->Offset,
-                                        bo->size - binding->Offset,
+                                        binding->BufferObject->Size - binding->Offset,
                                         &surf_offsets[i],
                                         dword_pitch);
          }
@@ -965,7 +965,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
                                       binding->Offset,
                                       binding->BufferObject->Size - binding->Offset);
             brw_create_buffer_surface(brw, bo, binding->Offset,
-                                      bo->size - binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset,
                                       &surf_offsets[i],
                                       dword_pitch);
          }
-- 
cgit v1.2.3


From 130031168de44e38e84000f279951ef70a99cb66 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 2 Sep 2015 15:58:01 +0200
Subject: glsl: layout qualifier can appear more than once since OpenGL 4.20
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also if GL_ARB_shading_language_420pack extension is enabled.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_parser.yy | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 59e4527b238..4dd5384dac8 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2595,9 +2595,15 @@ interface_block:
    {
       $$ = $1;
    }
-   | layout_qualifier basic_interface_block
+   | layout_qualifier interface_block
    {
-      ast_interface_block *block = $2;
+      ast_interface_block *block = (ast_interface_block *) $2;
+
+      if (!state->has_420pack() && block->layout.has_layout()) {
+         _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
+         YYERROR;
+      }
+
       if (!block->layout.merge_qualifier(& @1, state, $1)) {
          YYERROR;
       }
-- 
cgit v1.2.3


From 9c1f10b1bc548a00482a0b6b789c4cdb12f5302e Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 3 Sep 2015 09:47:56 +0200
Subject: glsl: ignore default qualifier declarations when checking for
 duplicate layout qualifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast.h                  | 3 +++
 src/glsl/glsl_parser.yy         | 5 ++++-
 src/glsl/glsl_parser_extras.cpp | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index d8c6cea7832..335f426d418 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -639,6 +639,9 @@ struct ast_type_qualifier {
     */
    glsl_base_type image_base_type;
 
+   /** Flag to know if this represents a default value for a qualifier */
+   bool is_default_qualifier;
+
    /**
     * Return true if and only if an interpolation qualifier is present.
     */
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4dd5384dac8..42108a3c1b6 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2599,7 +2599,8 @@ interface_block:
    {
       ast_interface_block *block = (ast_interface_block *) $2;
 
-      if (!state->has_420pack() && block->layout.has_layout()) {
+      if (!state->has_420pack() && block->layout.has_layout() &&
+          !block->layout.is_default_qualifier) {
          _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
          YYERROR;
       }
@@ -2608,6 +2609,8 @@ interface_block:
          YYERROR;
       }
 
+      block->layout.is_default_qualifier = false;
+
       foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
          ast_type_qualifier& qualifier = member->type->qualifier;
          if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index dae5261daca..7d976c26f8c 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -242,6 +242,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->default_uniform_qualifier = new(this) ast_type_qualifier();
    this->default_uniform_qualifier->flags.q.shared = 1;
    this->default_uniform_qualifier->flags.q.column_major = 1;
+   this->default_uniform_qualifier->is_default_qualifier = true;
 
    this->fs_uses_gl_fragcoord = false;
    this->fs_redeclares_gl_fragcoord = false;
-- 
cgit v1.2.3


From 3763a0e0a740ccbb5d8e0912361a7ed97111751c Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 10 Sep 2015 10:00:12 +0200
Subject: glsl: Move interface block processing to glsl_parser_extras.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No functional changes.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast.h                  |   5 ++
 src/glsl/glsl_parser.yy         | 127 +---------------------------------------
 src/glsl/glsl_parser_extras.cpp | 122 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 128 insertions(+), 126 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 335f426d418..cca32b334b2 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -1172,4 +1172,9 @@ extern void
 check_builtin_array_max_size(const char *name, unsigned size,
                              YYLTYPE loc, struct _mesa_glsl_parse_state *state);
 
+extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
+                                              _mesa_glsl_parse_state *state,
+                                              ast_interface_block *const block,
+                                              const struct ast_type_qualifier q);
+
 #endif /* AST_H */
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 42108a3c1b6..7f00929d1e4 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2634,132 +2634,7 @@ basic_interface_block:
       block->block_name = $2;
       block->declarations.push_degenerate_list_at_head(& $4->link);
 
-      if ($1.flags.q.buffer) {
-         if (!state->has_shader_storage_buffer_objects()) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 430 / GL_ARB_shader_storage_buffer_object "
-                             "required for defining shader storage blocks");
-         } else if (state->ARB_shader_storage_buffer_object_warn) {
-            _mesa_glsl_warning(& @1, state,
-                               "#version 430 / GL_ARB_shader_storage_buffer_object "
-                               "required for defining shader storage blocks");
-         }
-      } else if ($1.flags.q.uniform) {
-         if (!state->has_uniform_buffer_objects()) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 140 / GL_ARB_uniform_buffer_object "
-                             "required for defining uniform blocks");
-         } else if (state->ARB_uniform_buffer_object_warn) {
-            _mesa_glsl_warning(& @1, state,
-                               "#version 140 / GL_ARB_uniform_buffer_object "
-                               "required for defining uniform blocks");
-         }
-      } else {
-         if (state->es_shader || state->language_version < 150) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 150 required for using "
-                             "interface blocks");
-         }
-      }
-
-      /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
-       * "It is illegal to have an input block in a vertex shader
-       *  or an output block in a fragment shader"
-       */
-      if ((state->stage == MESA_SHADER_VERTEX) && $1.flags.q.in) {
-         _mesa_glsl_error(& @1, state,
-                          "`in' interface block is not allowed for "
-                          "a vertex shader");
-      } else if ((state->stage == MESA_SHADER_FRAGMENT) && $1.flags.q.out) {
-         _mesa_glsl_error(& @1, state,
-                          "`out' interface block is not allowed for "
-                          "a fragment shader");
-      }
-
-      /* Since block arrays require names, and both features are added in
-       * the same language versions, we don't have to explicitly
-       * version-check both things.
-       */
-      if (block->instance_name != NULL) {
-         state->check_version(150, 300, & @1, "interface blocks with "
-                               "an instance name are not allowed");
-      }
-
-      uint64_t interface_type_mask;
-      struct ast_type_qualifier temp_type_qualifier;
-
-      /* Get a bitmask containing only the in/out/uniform/buffer
-       * flags, allowing us to ignore other irrelevant flags like
-       * interpolation qualifiers.
-       */
-      temp_type_qualifier.flags.i = 0;
-      temp_type_qualifier.flags.q.uniform = true;
-      temp_type_qualifier.flags.q.buffer = true;
-      temp_type_qualifier.flags.q.in = true;
-      temp_type_qualifier.flags.q.out = true;
-      interface_type_mask = temp_type_qualifier.flags.i;
-
-      /* Get the block's interface qualifier.  The interface_qualifier
-       * production rule guarantees that only one bit will be set (and
-       * it will be in/out/uniform).
-       */
-      uint64_t block_interface_qualifier = $1.flags.i;
-
-      block->layout.flags.i |= block_interface_qualifier;
-
-      if (state->stage == MESA_SHADER_GEOMETRY &&
-          state->has_explicit_attrib_stream()) {
-         /* Assign global layout's stream value. */
-         block->layout.flags.q.stream = 1;
-         block->layout.flags.q.explicit_stream = 0;
-         block->layout.stream = state->out_qualifier->stream;
-      }
-
-      foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
-         ast_type_qualifier& qualifier = member->type->qualifier;
-         if ((qualifier.flags.i & interface_type_mask) == 0) {
-            /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
-             * "If no optional qualifier is used in a member declaration, the
-             *  qualifier of the variable is just in, out, or uniform as declared
-             *  by interface-qualifier."
-             */
-            qualifier.flags.i |= block_interface_qualifier;
-         } else if ((qualifier.flags.i & interface_type_mask) !=
-                    block_interface_qualifier) {
-            /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
-             * "If optional qualifiers are used, they can include interpolation
-             *  and storage qualifiers and they must declare an input, output,
-             *  or uniform variable consistent with the interface qualifier of
-             *  the block."
-             */
-            _mesa_glsl_error(& @1, state,
-                             "uniform/in/out qualifier on "
-                             "interface block member does not match "
-                             "the interface block");
-         }
-
-         /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
-          *
-          * "GLSL ES 3.0 does not support interface blocks for shader inputs or
-          * outputs."
-          *
-          * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
-          *
-          * "Only variables output from a shader can be candidates for
-          * invariance."
-          *
-          * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
-          *
-          * "If optional qualifiers are used, they can include interpolation
-          * qualifiers, auxiliary storage qualifiers, and storage qualifiers
-          * and they must declare an input, output, or uniform member
-          * consistent with the interface qualifier of the block"
-          */
-         if (qualifier.flags.q.invariant)
-            _mesa_glsl_error(&@1, state,
-                             "invariant qualifiers cannot be used "
-                             "with interface blocks members");
-      }
+      _mesa_ast_process_interface_block(& @1, state, block, $1);
 
       $$ = block;
    }
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 7d976c26f8c..67ef014c068 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -859,6 +859,128 @@ _mesa_ast_set_aggregate_type(const glsl_type *type,
    }
 }
 
+void
+_mesa_ast_process_interface_block(YYLTYPE *locp,
+                                  _mesa_glsl_parse_state *state,
+                                  ast_interface_block *const block,
+                                  const struct ast_type_qualifier q)
+{
+   if (q.flags.q.uniform) {
+      if (!state->has_uniform_buffer_objects()) {
+         _mesa_glsl_error(locp, state,
+                          "#version 140 / GL_ARB_uniform_buffer_object "
+                          "required for defining uniform blocks");
+      } else if (state->ARB_uniform_buffer_object_warn) {
+         _mesa_glsl_warning(locp, state,
+                            "#version 140 / GL_ARB_uniform_buffer_object "
+                            "required for defining uniform blocks");
+      }
+   } else {
+      if (state->es_shader || state->language_version < 150) {
+         _mesa_glsl_error(locp, state,
+                          "#version 150 required for using "
+                          "interface blocks");
+      }
+   }
+
+   /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+    * "It is illegal to have an input block in a vertex shader
+    *  or an output block in a fragment shader"
+    */
+   if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+      _mesa_glsl_error(locp, state,
+                       "`in' interface block is not allowed for "
+                       "a vertex shader");
+   } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+      _mesa_glsl_error(locp, state,
+                       "`out' interface block is not allowed for "
+                       "a fragment shader");
+   }
+
+   /* Since block arrays require names, and both features are added in
+    * the same language versions, we don't have to explicitly
+    * version-check both things.
+    */
+   if (block->instance_name != NULL) {
+      state->check_version(150, 300, locp, "interface blocks with "
+                           "an instance name are not allowed");
+   }
+
+   uint64_t interface_type_mask;
+   struct ast_type_qualifier temp_type_qualifier;
+
+   /* Get a bitmask containing only the in/out/uniform
+    * flags, allowing us to ignore other irrelevant flags like
+    * interpolation qualifiers.
+    */
+   temp_type_qualifier.flags.i = 0;
+   temp_type_qualifier.flags.q.uniform = true;
+   temp_type_qualifier.flags.q.in = true;
+   temp_type_qualifier.flags.q.out = true;
+   interface_type_mask = temp_type_qualifier.flags.i;
+
+   /* Get the block's interface qualifier.  The interface_qualifier
+    * production rule guarantees that only one bit will be set (and
+    * it will be in/out/uniform).
+    */
+   uint64_t block_interface_qualifier = q.flags.i;
+
+   block->layout.flags.i |= block_interface_qualifier;
+
+   if (state->stage == MESA_SHADER_GEOMETRY &&
+       state->has_explicit_attrib_stream()) {
+      /* Assign global layout's stream value. */
+      block->layout.flags.q.stream = 1;
+      block->layout.flags.q.explicit_stream = 0;
+      block->layout.stream = state->out_qualifier->stream;
+   }
+
+   foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+      ast_type_qualifier& qualifier = member->type->qualifier;
+      if ((qualifier.flags.i & interface_type_mask) == 0) {
+         /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+          * "If no optional qualifier is used in a member declaration, the
+          *  qualifier of the variable is just in, out, or uniform as declared
+          *  by interface-qualifier."
+          */
+         qualifier.flags.i |= block_interface_qualifier;
+      } else if ((qualifier.flags.i & interface_type_mask) !=
+                 block_interface_qualifier) {
+         /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+          * "If optional qualifiers are used, they can include interpolation
+          *  and storage qualifiers and they must declare an input, output,
+          *  or uniform variable consistent with the interface qualifier of
+          *  the block."
+          */
+         _mesa_glsl_error(locp, state,
+                          "uniform/in/out qualifier on "
+                          "interface block member does not match "
+                          "the interface block");
+      }
+
+      /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+       *
+       * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+       * outputs."
+       *
+       * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+       *
+       * "Only variables output from a shader can be candidates for
+       * invariance."
+       *
+       * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+       *
+       * "If optional qualifiers are used, they can include interpolation
+       * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+       * and they must declare an input, output, or uniform member
+       * consistent with the interface qualifier of the block"
+       */
+      if (qualifier.flags.q.invariant)
+         _mesa_glsl_error(locp, state,
+                          "invariant qualifiers cannot be used "
+                          "with interface blocks members");
+   }
+}
 
 void
 _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
-- 
cgit v1.2.3


From a40f917c4b0100a7ec9a97a12ce0959f83a3f61b Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 3 Sep 2015 12:00:16 +0200
Subject: glsl: allow default qualifiers for shader storage block definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This kind of definitions:

    layout(xxx) buffer;

was not supported by commit 84fc5fece006.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_parser.yy         | 46 ++++++++++++++++++++++++++++++++++++++++-
 src/glsl/glsl_parser_extras.cpp | 20 ++++++++++++++++--
 src/glsl/glsl_parser_extras.h   |  7 +++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 7f00929d1e4..028974e7eb9 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -169,6 +169,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
 %type <identifier> any_identifier
 %type <interface_block> instance_name_opt
+%type <interface_block> buffer_instance_name_opt
 %token <real> FLOATCONSTANT
 %token <dreal> DOUBLECONSTANT
 %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
@@ -218,6 +219,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %type <type_qualifier> subroutine_qualifier
 %type <subroutine_list> subroutine_type_list
 %type <type_qualifier> interface_qualifier
+%type <type_qualifier> buffer_interface_qualifier
 %type <type_specifier> type_specifier
 %type <type_specifier> type_specifier_nonarray
 %type <array_specifier> array_specifier
@@ -2638,6 +2640,17 @@ basic_interface_block:
 
       $$ = block;
    }
+   | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';'
+   {
+      ast_interface_block *const block = $6;
+
+      block->block_name = $2;
+      block->declarations.push_degenerate_list_at_head(& $4->link);
+
+      _mesa_ast_process_interface_block(& @1, state, block, $1);
+
+      $$ = block;
+   }
    ;
 
 interface_qualifier:
@@ -2656,7 +2669,10 @@ interface_qualifier:
       memset(& $$, 0, sizeof($$));
       $$.flags.q.uniform = 1;
    }
-   | BUFFER
+   ;
+
+buffer_interface_qualifier:
+   BUFFER
    {
       memset(& $$, 0, sizeof($$));
       $$.flags.q.buffer = 1;
@@ -2683,6 +2699,26 @@ instance_name_opt:
    }
    ;
 
+buffer_instance_name_opt:
+   /* empty */
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          NULL, NULL);
+   }
+   | NEW_IDENTIFIER
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          $1, NULL);
+      $$->set_location(@1);
+   }
+   | NEW_IDENTIFIER array_specifier
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          $1, $2);
+      $$->set_location_range(@1, @2);
+   }
+   ;
+
 member_list:
    member_declaration
    {
@@ -2729,6 +2765,14 @@ layout_defaults:
       $$ = NULL;
    }
 
+   | layout_qualifier BUFFER ';'
+   {
+      if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
+         YYERROR;
+      }
+      $$ = NULL;
+   }
+
    | layout_qualifier IN_TOK ';'
    {
       $$ = NULL;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 67ef014c068..223596be798 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -244,6 +244,11 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->default_uniform_qualifier->flags.q.column_major = 1;
    this->default_uniform_qualifier->is_default_qualifier = true;
 
+   this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+   this->default_shader_storage_qualifier->flags.q.shared = 1;
+   this->default_shader_storage_qualifier->flags.q.column_major = 1;
+   this->default_shader_storage_qualifier->is_default_qualifier = true;
+
    this->fs_uses_gl_fragcoord = false;
    this->fs_redeclares_gl_fragcoord = false;
    this->fs_origin_upper_left = false;
@@ -865,7 +870,17 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
                                   ast_interface_block *const block,
                                   const struct ast_type_qualifier q)
 {
-   if (q.flags.q.uniform) {
+   if (q.flags.q.buffer) {
+      if (!state->has_shader_storage_buffer_objects()) {
+         _mesa_glsl_error(locp, state,
+                          "#version 430 / GL_ARB_shader_storage_buffer_object "
+                          "required for defining shader storage blocks");
+      } else if (state->ARB_shader_storage_buffer_object_warn) {
+         _mesa_glsl_warning(locp, state,
+                            "#version 430 / GL_ARB_shader_storage_buffer_object "
+                            "required for defining shader storage blocks");
+      }
+   } else if (q.flags.q.uniform) {
       if (!state->has_uniform_buffer_objects()) {
          _mesa_glsl_error(locp, state,
                           "#version 140 / GL_ARB_uniform_buffer_object "
@@ -909,7 +924,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
    uint64_t interface_type_mask;
    struct ast_type_qualifier temp_type_qualifier;
 
-   /* Get a bitmask containing only the in/out/uniform
+   /* Get a bitmask containing only the in/out/uniform/buffer
     * flags, allowing us to ignore other irrelevant flags like
     * interpolation qualifiers.
     */
@@ -917,6 +932,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
    temp_type_qualifier.flags.q.uniform = true;
    temp_type_qualifier.flags.q.in = true;
    temp_type_qualifier.flags.q.out = true;
+   temp_type_qualifier.flags.q.buffer = true;
    interface_type_mask = temp_type_qualifier.flags.i;
 
    /* Get the block's interface qualifier.  The interface_qualifier
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index d3b091d9dce..5b757d3716f 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -274,6 +274,13 @@ struct _mesa_glsl_parse_state {
     */
    struct ast_type_qualifier *default_uniform_qualifier;
 
+   /**
+    * Default shader storage layout qualifiers tracked during parsing.
+    * Currently affects shader storage blocks and shader storage buffer
+    * variables in those blocks.
+    */
+   struct ast_type_qualifier *default_shader_storage_qualifier;
+
    /**
     * Variables to track different cases if a fragment shader redeclares
     * built-in variable gl_FragCoord.
-- 
cgit v1.2.3


From 35476c2bae5d59adf5fcfce8c83958ed076264e5 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 31 Aug 2015 07:44:47 +0200
Subject: glsl: Add std430 related member functions to glsl_type class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They are used to calculate size, base alignment and array stride values
for a glsl_type following std430 rules.

v2:
- Paste OpenGL 4.3 spec wording as it mentions stride of array. (Jordan)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_types.cpp | 207 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/glsl/glsl_types.h   |  19 +++++
 2 files changed, 226 insertions(+)

(limited to 'src')

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 07d72489057..93034a67f01 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -1388,6 +1388,213 @@ glsl_type::std140_size(bool row_major) const
    return -1;
 }
 
+unsigned
+glsl_type::std430_base_alignment(bool row_major) const
+{
+
+   unsigned N = is_double() ? 8 : 4;
+
+   /* (1) If the member is a scalar consuming <N> basic machine units, the
+    *     base alignment is <N>.
+    *
+    * (2) If the member is a two- or four-component vector with components
+    *     consuming <N> basic machine units, the base alignment is 2<N> or
+    *     4<N>, respectively.
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_scalar() || this->is_vector()) {
+      switch (this->vector_elements) {
+      case 1:
+         return N;
+      case 2:
+         return 2 * N;
+      case 3:
+      case 4:
+         return 4 * N;
+      }
+   }
+
+   /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+    *
+    * "When using the std430 storage layout, shader storage blocks will be
+    * laid out in buffer storage identically to uniform and shader storage
+    * blocks using the std140 layout, except that the base alignment and
+    * stride of arrays of scalars and vectors in rule 4 and of structures
+    * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+    */
+
+   /* (1) If the member is a scalar consuming <N> basic machine units, the
+    *     base alignment is <N>.
+    *
+    * (2) If the member is a two- or four-component vector with components
+    *     consuming <N> basic machine units, the base alignment is 2<N> or
+    *     4<N>, respectively.
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_array())
+      return this->fields.array->std430_base_alignment(row_major);
+
+   /* (5) If the member is a column-major matrix with <C> columns and
+    *     <R> rows, the matrix is stored identically to an array of
+    *     <C> column vectors with <R> components each, according to
+    *     rule (4).
+    *
+    * (7) If the member is a row-major matrix with <C> columns and <R>
+    *     rows, the matrix is stored identically to an array of <R>
+    *     row vectors with <C> components each, according to rule (4).
+    */
+   if (this->is_matrix()) {
+      const struct glsl_type *vec_type, *array_type;
+      int c = this->matrix_columns;
+      int r = this->vector_elements;
+
+      if (row_major) {
+         vec_type = get_instance(base_type, c, 1);
+         array_type = glsl_type::get_array_instance(vec_type, r);
+      } else {
+         vec_type = get_instance(base_type, r, 1);
+         array_type = glsl_type::get_array_instance(vec_type, c);
+      }
+
+      return array_type->std430_base_alignment(false);
+   }
+
+      /* (9) If the member is a structure, the base alignment of the
+    *     structure is <N>, where <N> is the largest base alignment
+    *     value of any of its members, and rounded up to the base
+    *     alignment of a vec4. The individual members of this
+    *     sub-structure are then assigned offsets by applying this set
+    *     of rules recursively, where the base offset of the first
+    *     member of the sub-structure is equal to the aligned offset
+    *     of the structure. The structure may have padding at the end;
+    *     the base offset of the member following the sub-structure is
+    *     rounded up to the next multiple of the base alignment of the
+    *     structure.
+    */
+   if (this->is_record()) {
+      unsigned base_alignment = 0;
+      for (unsigned i = 0; i < this->length; i++) {
+         bool field_row_major = row_major;
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+            field_row_major = true;
+         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+            field_row_major = false;
+         }
+
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         base_alignment = MAX2(base_alignment,
+                               field_type->std430_base_alignment(field_row_major));
+      }
+      return base_alignment;
+   }
+   assert(!"not reached");
+   return -1;
+}
+
+unsigned
+glsl_type::std430_array_stride(bool row_major) const
+{
+   unsigned N = is_double() ? 8 : 4;
+
+   /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+    * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_vector() && this->vector_elements == 3)
+      return 4 * N;
+
+   /* By default use std430_size(row_major) */
+   return this->std430_size(row_major);
+}
+
+unsigned
+glsl_type::std430_size(bool row_major) const
+{
+   unsigned N = is_double() ? 8 : 4;
+
+   /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+    *
+    * "When using the std430 storage layout, shader storage blocks will be
+    * laid out in buffer storage identically to uniform and shader storage
+    * blocks using the std140 layout, except that the base alignment and
+    * stride of arrays of scalars and vectors in rule 4 and of structures
+    * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+    */
+   if (this->is_scalar() || this->is_vector())
+         return this->vector_elements * N;
+
+   if (this->without_array()->is_matrix()) {
+      const struct glsl_type *element_type;
+      const struct glsl_type *vec_type;
+      unsigned int array_len;
+
+      if (this->is_array()) {
+         element_type = this->fields.array;
+         array_len = this->length;
+      } else {
+         element_type = this;
+         array_len = 1;
+      }
+
+      if (row_major) {
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->matrix_columns, 1);
+
+         array_len *= element_type->vector_elements;
+      } else {
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->vector_elements, 1);
+         array_len *= element_type->matrix_columns;
+      }
+      const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                  array_len);
+
+      return array_type->std430_size(false);
+   }
+
+   if (this->is_array()) {
+      if (this->fields.array->is_record())
+         return this->length * this->fields.array->std430_size(row_major);
+      else
+         return this->length * this->fields.array->std430_base_alignment(row_major);
+   }
+
+   if (this->is_record() || this->is_interface()) {
+      unsigned size = 0;
+      unsigned max_align = 0;
+
+      for (unsigned i = 0; i < this->length; i++) {
+         bool field_row_major = row_major;
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+            field_row_major = true;
+         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+            field_row_major = false;
+         }
+
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         unsigned align = field_type->std430_base_alignment(field_row_major);
+         size = glsl_align(size, align);
+         size += field_type->std430_size(field_row_major);
+
+         max_align = MAX2(align, max_align);
+      }
+      size = glsl_align(size, max_align);
+      return size;
+   }
+
+   assert(!"not reached");
+   return -1;
+}
 
 unsigned
 glsl_type::count_attribute_slots() const
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 860276a2b17..785f5aa74bf 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -333,6 +333,25 @@ struct glsl_type {
     */
    unsigned std140_size(bool row_major) const;
 
+   /**
+    * Alignment in bytes of the start of this type in a std430 shader
+    * storage block.
+    */
+   unsigned std430_base_alignment(bool row_major) const;
+
+   /**
+    * Calculate array stride in bytes of this type in a std430 shader storage
+    * block.
+    */
+   unsigned std430_array_stride(bool row_major) const;
+
+   /**
+    * Size in bytes of this type in a std430 shader storage block.
+    *
+    * Note that this is not GL_BUFFER_SIZE
+    */
+   unsigned std430_size(bool row_major) const;
+
    /**
     * \brief Can this type be implicitly converted to another?
     *
-- 
cgit v1.2.3


From 8f0167c65b2df73cf2ef094358ba162fe0028d14 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 31 Aug 2015 07:45:53 +0200
Subject: glsl: Add parser/compiler support for std430 interface packing
 qualifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Fix a missing check in has_layout()

v3:
- Mention shader storage block in error message for layout qualifiers
  (Kristian).

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast.h                   |  1 +
 src/glsl/ast_to_hir.cpp          | 21 +++++++++++++++++----
 src/glsl/ast_type.cpp            |  2 ++
 src/glsl/glsl_parser.yy          |  2 ++
 src/glsl/glsl_types.h            |  3 ++-
 src/glsl/link_uniform_blocks.cpp | 15 ++++++++++++---
 src/mesa/main/mtypes.h           |  3 ++-
 7 files changed, 38 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index cca32b334b2..4c314366133 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -491,6 +491,7 @@ struct ast_type_qualifier {
 	 /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
 	 /** \{ */
          unsigned std140:1;
+         unsigned std430:1;
          unsigned shared:1;
          unsigned packed:1;
          unsigned column_major:1;
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 92038a62d81..d6071ef59fc 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2920,11 +2920,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
        var->data.depth_layout = ir_depth_layout_none;
 
    if (qual->flags.q.std140 ||
+       qual->flags.q.std430 ||
        qual->flags.q.packed ||
        qual->flags.q.shared) {
       _mesa_glsl_error(loc, state,
-                       "uniform block layout qualifiers std140, packed, and "
-                       "shared can only be applied to uniform blocks, not "
+                       "uniform and shader storage block layout qualifiers "
+                       "std140, std430, packed, and shared can only be "
+                       "applied to uniform or shader storage blocks, not "
                        "members");
    }
 
@@ -5691,12 +5693,14 @@ ast_process_structure_or_interface_block(exec_list *instructions,
          const struct ast_type_qualifier *const qual =
             & decl_list->type->qualifier;
          if (qual->flags.q.std140 ||
+             qual->flags.q.std430 ||
              qual->flags.q.packed ||
              qual->flags.q.shared) {
             _mesa_glsl_error(&loc, state,
                              "uniform/shader storage block layout qualifiers "
-                             "std140, packed, and shared can only be applied "
-                             "to uniform/shader storage blocks, not members");
+                             "std140, std430, packed, and shared can only be "
+                             "applied to uniform/shader storage blocks, not "
+                             "members");
          }
 
          if (qual->flags.q.constant) {
@@ -5908,6 +5912,13 @@ ast_interface_block::hir(exec_list *instructions,
                        this->block_name);
    }
 
+   if (!this->layout.flags.q.buffer &&
+       this->layout.flags.q.std430) {
+      _mesa_glsl_error(&loc, state,
+                       "std430 storage block layout qualifier is supported "
+                       "only for shader storage blocks");
+   }
+
    /* The ast_interface_block has a list of ast_declarator_lists.  We
     * need to turn those into ir_variables with an association
     * with this uniform block.
@@ -5917,6 +5928,8 @@ ast_interface_block::hir(exec_list *instructions,
       packing = GLSL_INTERFACE_PACKING_SHARED;
    } else if (this->layout.flags.q.packed) {
       packing = GLSL_INTERFACE_PACKING_PACKED;
+   } else if (this->layout.flags.q.std430) {
+      packing = GLSL_INTERFACE_PACKING_STD430;
    } else {
       /* The default layout is std140.
        */
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index a4671e203e2..08a4504296b 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -65,6 +65,7 @@ ast_type_qualifier::has_layout() const
           || this->flags.q.depth_less
           || this->flags.q.depth_unchanged
           || this->flags.q.std140
+          || this->flags.q.std430
           || this->flags.q.shared
           || this->flags.q.column_major
           || this->flags.q.row_major
@@ -123,6 +124,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    ubo_layout_mask.flags.q.std140 = 1;
    ubo_layout_mask.flags.q.packed = 1;
    ubo_layout_mask.flags.q.shared = 1;
+   ubo_layout_mask.flags.q.std430 = 1;
 
    ast_type_qualifier ubo_binding_mask;
    ubo_binding_mask.flags.i = 0;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 028974e7eb9..4cb018a5862 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1199,6 +1199,8 @@ layout_qualifier_id:
             $$.flags.q.std140 = 1;
          } else if (match_layout_qualifier($1, "shared", state) == 0) {
             $$.flags.q.shared = 1;
+         } else if (match_layout_qualifier($1, "std430", state) == 0) {
+            $$.flags.q.std430 = 1;
          } else if (match_layout_qualifier($1, "column_major", state) == 0) {
             $$.flags.q.column_major = 1;
          /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 785f5aa74bf..d58d8189e21 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -77,7 +77,8 @@ enum glsl_sampler_dim {
 enum glsl_interface_packing {
    GLSL_INTERFACE_PACKING_STD140,
    GLSL_INTERFACE_PACKING_SHARED,
-   GLSL_INTERFACE_PACKING_PACKED
+   GLSL_INTERFACE_PACKING_PACKED,
+   GLSL_INTERFACE_PACKING_STD430
 };
 
 enum glsl_matrix_layout {
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 4df39e200d5..c891d030234 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -119,8 +119,16 @@ private:
          v->IndexName = v->Name;
       }
 
-      const unsigned alignment = type->std140_base_alignment(v->RowMajor);
-      unsigned size = type->std140_size(v->RowMajor);
+      unsigned alignment = 0;
+      unsigned size = 0;
+
+      if (v->Type->interface_packing == GLSL_INTERFACE_PACKING_STD430) {
+         alignment = type->std430_base_alignment(v->RowMajor);
+         size = type->std430_size(v->RowMajor);
+      } else {
+         alignment = type->std140_base_alignment(v->RowMajor);
+         size = type->std140_size(v->RowMajor);
+      }
 
       this->offset = glsl_align(this->offset, alignment);
       v->Offset = this->offset;
@@ -255,7 +263,8 @@ link_uniform_blocks(void *mem_ctx,
                  == unsigned(ubo_packing_shared));
    STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
                  == unsigned(ubo_packing_packed));
-
+   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
+                 == unsigned(ubo_packing_std430));
 
    hash_table_foreach (block_hash, entry) {
       const struct link_uniform_block_active *const b =
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 22b1d14636f..d53d99ad843 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2454,7 +2454,8 @@ enum gl_uniform_block_packing
 {
    ubo_packing_std140,
    ubo_packing_shared,
-   ubo_packing_packed
+   ubo_packing_packed,
+   ubo_packing_std430
 };
 
 
-- 
cgit v1.2.3


From 1be180b941ac9e0652a6b2d19713ddccd8a5151f Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 4 Sep 2015 17:53:48 +0200
Subject: glsl: Add std430 support to program_resource_visitor's member
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They are used to calculate the offset, array stride of uniform/shader
storage buffer variables. Take into account this info to get the right
value for std430.

v2:
- Fix commit log line length and indention. (Jordan)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/link_uniform_blocks.cpp | 19 ++++++--
 src/glsl/link_uniforms.cpp       | 98 +++++++++++++++++++++++++++-------------
 src/glsl/linker.h                |  6 ++-
 3 files changed, 84 insertions(+), 39 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index c891d030234..8f65f4a3ab7 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -68,14 +68,18 @@ private:
    }
 
    virtual void enter_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
       assert(type->is_record());
-      this->offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->offset = glsl_align(
+            this->offset, type->std430_base_alignment(row_major));
+      else
+         this->offset = glsl_align(
             this->offset, type->std140_base_alignment(row_major));
    }
 
    virtual void leave_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
       assert(type->is_record());
 
       /* If this is the last field of a structure, apply rule #9.  The
@@ -85,12 +89,17 @@ private:
        *     the member following the sub-structure is rounded up to the next
        *     multiple of the base alignment of the structure."
        */
-      this->offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->offset = glsl_align(
+            this->offset, type->std430_base_alignment(row_major));
+      else
+         this->offset = glsl_align(
             this->offset, type->std140_base_alignment(row_major));
    }
 
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *,
+                            const unsigned packing,
                             bool /* last_field */)
    {
       assert(this->index < this->num_variables);
@@ -122,7 +131,7 @@ private:
       unsigned alignment = 0;
       unsigned size = 0;
 
-      if (v->Type->interface_packing == GLSL_INTERFACE_PACKING_STD430) {
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
          alignment = type->std430_base_alignment(v->RowMajor);
          size = type->std430_size(v->RowMajor);
       } else {
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 67a6e1bea17..7d6b51de06d 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -65,7 +65,9 @@ program_resource_visitor::process(const glsl_type *type, const char *name)
 
    unsigned record_array_count = 1;
    char *name_copy = ralloc_strdup(NULL, name);
-   recursion(type, &name_copy, strlen(name), false, NULL, false,
+   unsigned packing = type->interface_packing;
+
+   recursion(type, &name_copy, strlen(name), false, NULL, packing, false,
              record_array_count);
    ralloc_free(name_copy);
 }
@@ -78,6 +80,10 @@ program_resource_visitor::process(ir_variable *var)
    const bool row_major =
       var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
 
+   const unsigned packing = var->get_interface_type() ?
+      var->get_interface_type()->interface_packing :
+      var->type->interface_packing;
+
    /* false is always passed for the row_major parameter to the other
     * processing functions because no information is available to do
     * otherwise.  See the warning in linker.h.
@@ -114,8 +120,8 @@ program_resource_visitor::process(ir_variable *var)
           * lowering is only applied to non-uniform interface blocks, so we
           * can safely pass false for row_major.
           */
-         recursion(var->type, &name, new_length, row_major, NULL, false,
-                   record_array_count);
+         recursion(var->type, &name, new_length, row_major, NULL, packing,
+                   false, record_array_count);
       }
       ralloc_free(name);
    } else if (var->data.from_named_ifc_block_nonarray) {
@@ -139,26 +145,26 @@ program_resource_visitor::process(ir_variable *var)
        * is only applied to non-uniform interface blocks, so we can safely
        * pass false for row_major.
        */
-      recursion(var->type, &name, strlen(name), row_major, NULL, false,
-                record_array_count);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
       ralloc_free(name);
    } else if (t->without_array()->is_record()) {
       char *name = ralloc_strdup(NULL, var->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false,
-                record_array_count);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
       ralloc_free(name);
    } else if (t->is_interface()) {
       char *name = ralloc_strdup(NULL, var->type->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false,
-                record_array_count);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
       ralloc_free(name);
    } else if (t->is_array() && t->fields.array->is_interface()) {
       char *name = ralloc_strdup(NULL, var->type->fields.array->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false,
-                record_array_count);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
       ralloc_free(name);
    } else {
-      this->visit_field(t, var->name, row_major, NULL, false);
+      this->visit_field(t, var->name, row_major, NULL, packing, false);
    }
 }
 
@@ -166,6 +172,7 @@ void
 program_resource_visitor::recursion(const glsl_type *t, char **name,
                                     size_t name_length, bool row_major,
                                     const glsl_type *record_type,
+                                    const unsigned packing,
                                     bool last_field,
                                     unsigned record_array_count)
 {
@@ -180,7 +187,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
          record_type = t;
 
       if (t->is_record())
-         this->enter_record(t, *name, row_major);
+         this->enter_record(t, *name, row_major, packing);
 
       for (unsigned i = 0; i < t->length; i++) {
 	 const char *field = t->fields.structure[i].name;
@@ -214,6 +221,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
          recursion(t->fields.structure[i].type, name, new_length,
                    field_row_major,
                    record_type,
+                   packing,
                    (i + 1) == t->length, record_array_count);
 
          /* Only the first leaf-field of the record gets called with the
@@ -224,7 +232,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
 
       if (t->is_record()) {
          (*name)[name_length] = '\0';
-         this->leave_record(t, *name, row_major);
+         this->leave_record(t, *name, row_major, packing);
       }
    } else if (t->is_array() && (t->fields.array->is_record()
                                 || t->fields.array->is_interface())) {
@@ -247,6 +255,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
 
          recursion(t->fields.array, name, new_length, row_major,
                    record_type,
+                   packing,
                    (i + 1) == t->length, record_array_count);
 
          /* Only the first leaf-field of the record gets called with the
@@ -256,7 +265,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
       }
    } else {
       this->set_record_array_count(record_array_count);
-      this->visit_field(t, *name, row_major, record_type, last_field);
+      this->visit_field(t, *name, row_major, record_type, packing, last_field);
    }
 }
 
@@ -264,6 +273,7 @@ void
 program_resource_visitor::visit_field(const glsl_type *type, const char *name,
                                       bool row_major,
                                       const glsl_type *,
+                                      const unsigned,
                                       bool /* last_field */)
 {
    visit_field(type, name, row_major);
@@ -277,12 +287,14 @@ program_resource_visitor::visit_field(const glsl_struct_field *field)
 }
 
 void
-program_resource_visitor::enter_record(const glsl_type *, const char *, bool)
+program_resource_visitor::enter_record(const glsl_type *, const char *, bool,
+                                       const unsigned)
 {
 }
 
 void
-program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
+program_resource_visitor::leave_record(const glsl_type *, const char *, bool,
+                                       const unsigned)
 {
 }
 
@@ -677,25 +689,34 @@ private:
    }
 
    virtual void enter_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
       assert(type->is_record());
       if (this->ubo_block_index == -1)
          return;
-      this->ubo_byte_offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std430_base_alignment(row_major));
+      else
+         this->ubo_byte_offset = glsl_align(
             this->ubo_byte_offset, type->std140_base_alignment(row_major));
    }
 
    virtual void leave_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
       assert(type->is_record());
       if (this->ubo_block_index == -1)
          return;
-      this->ubo_byte_offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std430_base_alignment(row_major));
+      else
+         this->ubo_byte_offset = glsl_align(
             this->ubo_byte_offset, type->std140_base_alignment(row_major));
    }
 
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
+                            const unsigned packing,
                             bool /* last_field */)
    {
       assert(!type->without_array()->is_record());
@@ -770,16 +791,25 @@ private:
          this->uniforms[id].storage = this->values;
 
       if (this->ubo_block_index != -1) {
-	 this->uniforms[id].block_index = this->ubo_block_index;
-
-	 const unsigned alignment = type->std140_base_alignment(row_major);
-	 this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
-	 this->uniforms[id].offset = this->ubo_byte_offset;
-	 this->ubo_byte_offset += type->std140_size(row_major);
-
-	 if (type->is_array()) {
-	    this->uniforms[id].array_stride =
-	       glsl_align(type->fields.array->std140_size(row_major), 16);
+         this->uniforms[id].block_index = this->ubo_block_index;
+
+         unsigned alignment = type->std140_base_alignment(row_major);
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            alignment = type->std430_base_alignment(row_major);
+         this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
+         this->uniforms[id].offset = this->ubo_byte_offset;
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            this->ubo_byte_offset += type->std430_size(row_major);
+         else
+            this->ubo_byte_offset += type->std140_size(row_major);
+
+         if (type->is_array()) {
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               this->uniforms[id].array_stride =
+                  type->fields.array->std430_array_stride(row_major);
+            else
+               this->uniforms[id].array_stride =
+                  glsl_align(type->fields.array->std140_size(row_major), 16);
 	 } else {
 	    this->uniforms[id].array_stride = 0;
 	 }
@@ -790,7 +820,11 @@ private:
             const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
 
             assert(items <= 4);
-            this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               this->uniforms[id].matrix_stride = items < 3 ? items * N :
+                                                          glsl_align(items * N, 16);
+            else
+               this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
 	    this->uniforms[id].row_major = row_major;
 	 } else {
 	    this->uniforms[id].matrix_stride = 0;
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index b31052e767e..7c2bd59aeb7 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -150,6 +150,7 @@ protected:
     */
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
+                            const unsigned packing,
                             bool last_field);
 
    /**
@@ -173,10 +174,10 @@ protected:
    virtual void visit_field(const glsl_struct_field *field);
 
    virtual void enter_record(const glsl_type *type, const char *name,
-                             bool row_major);
+                             bool row_major, const unsigned packing);
 
    virtual void leave_record(const glsl_type *type, const char *name,
-                             bool row_major);
+                             bool row_major, const unsigned packing);
 
    virtual void set_record_array_count(unsigned record_array_count);
 
@@ -190,6 +191,7 @@ private:
     */
    void recursion(const glsl_type *t, char **name, size_t name_length,
                   bool row_major, const glsl_type *record_type,
+                  const unsigned packing,
                   bool last_field, unsigned record_array_count);
 };
 
-- 
cgit v1.2.3


From e854a9800120ac0b1930da27f39207a35a637779 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 13 Apr 2015 09:50:53 +0200
Subject: glsl: add std430 interface packing support to ssbo related operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Get interface packing information from interface's type, not the
  variable type.
- Simplify is_std430 condition in emit_access() for readability (Jordan)
- Add a commment explaing why array of three-component vector case is
  different in std430 than the rest of cases.
- Add calls to std430_array_stride().

v3:
- Simplify size_mul change for std430's case (Jordan)
- Fix commit log lines length (Jordan)
- Pass 'packing' instead of 'is_std430' to emit_access() (Kristian)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 111 ++++++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 30 deletions(-)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 8694383c4ed..4aaa2598a83 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -147,7 +147,8 @@ public:
                                 ir_rvalue **offset,
                                 unsigned *const_offset,
                                 bool *row_major,
-                                int *matrix_columns);
+                                int *matrix_columns,
+                                unsigned packing);
    ir_expression *ubo_load(const struct glsl_type *type,
 			   ir_rvalue *offset);
    ir_call *ssbo_load(const struct glsl_type *type,
@@ -164,7 +165,7 @@ public:
    void emit_access(bool is_write, ir_dereference *deref,
                     ir_variable *base_offset, unsigned int deref_offset,
                     bool row_major, int matrix_columns,
-                    unsigned write_mask);
+                    unsigned packing, unsigned write_mask);
 
    ir_visitor_status visit_enter(class ir_expression *);
    ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
@@ -176,7 +177,8 @@ public:
                                                     ir_variable *);
    ir_expression *emit_ssbo_get_buffer_size();
 
-   unsigned calculate_unsized_array_stride(ir_dereference *deref);
+   unsigned calculate_unsized_array_stride(ir_dereference *deref,
+                                           unsigned packing);
 
    void *mem_ctx;
    struct gl_shader *shader;
@@ -257,7 +259,8 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
                                                      ir_rvalue **offset,
                                                      unsigned *const_offset,
                                                      bool *row_major,
-                                                     int *matrix_columns)
+                                                     int *matrix_columns,
+                                                     unsigned packing)
 {
    /* Determine the name of the interface block */
    ir_rvalue *nonconst_block_index;
@@ -343,8 +346,15 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
             const bool array_row_major =
                is_dereferenced_thing_row_major(deref_array);
 
-            array_stride = deref_array->type->std140_size(array_row_major);
-            array_stride = glsl_align(array_stride, 16);
+            /* The array type will give the correct interface packing
+             * information
+             */
+            if (packing == GLSL_INTERFACE_PACKING_STD430) {
+               array_stride = deref_array->type->std430_array_stride(array_row_major);
+            } else {
+               array_stride = deref_array->type->std140_size(array_row_major);
+               array_stride = glsl_align(array_stride, 16);
+            }
          }
 
          ir_rvalue *array_index = deref_array->array_index;
@@ -380,7 +390,12 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
 
             ralloc_free(field_deref);
 
-            unsigned field_align = type->std140_base_alignment(field_row_major);
+            unsigned field_align = 0;
+
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               field_align = type->std430_base_alignment(field_row_major);
+            else
+               field_align = type->std140_base_alignment(field_row_major);
 
             intra_struct_offset = glsl_align(intra_struct_offset, field_align);
 
@@ -388,7 +403,10 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
                        deref_record->field) == 0)
                break;
 
-            intra_struct_offset += type->std140_size(field_row_major);
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               intra_struct_offset += type->std430_size(field_row_major);
+            else
+               intra_struct_offset += type->std140_size(field_row_major);
 
             /* If the field just examined was itself a structure, apply rule
              * #9:
@@ -437,13 +455,15 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    unsigned const_offset;
    bool row_major;
    int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
 
    /* Compute the offset to the start if the dereference as well as other
     * information we need to configure the write
     */
    setup_for_load_or_store(var, deref,
                            &offset, &const_offset,
-                           &row_major, &matrix_columns);
+                           &row_major, &matrix_columns,
+                           packing);
    assert(offset);
 
    /* Now that we've calculated the offset to the start of the
@@ -463,7 +483,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 
    deref = new(mem_ctx) ir_dereference_variable(load_var);
    emit_access(false, deref, load_offset, const_offset,
-               row_major, matrix_columns, 0);
+               row_major, matrix_columns, packing, 0);
    *rvalue = deref;
 
    progress = true;
@@ -581,6 +601,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
                                          unsigned int deref_offset,
                                          bool row_major,
                                          int matrix_columns,
+                                         unsigned packing,
                                          unsigned write_mask)
 {
    if (deref->type->is_record()) {
@@ -599,7 +620,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
 
          emit_access(is_write, field_deref, base_offset,
                      deref_offset + field_offset,
-                     row_major, 1,
+                     row_major, 1, packing,
                      writemask_for_size(field_deref->type->vector_elements));
 
          field_offset += field->type->std140_size(row_major);
@@ -608,7 +629,8 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
    }
 
    if (deref->type->is_array()) {
-      unsigned array_stride =
+      unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
+         deref->type->fields.array->std430_array_stride(row_major) :
          glsl_align(deref->type->fields.array->std140_size(row_major), 16);
 
       for (unsigned i = 0; i < deref->type->length; i++) {
@@ -618,7 +640,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
                                               element);
          emit_access(is_write, element_deref, base_offset,
                      deref_offset + i * array_stride,
-                     row_major, 1,
+                     row_major, 1, packing,
                      writemask_for_size(element_deref->type->vector_elements));
       }
       return;
@@ -637,18 +659,33 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
             int size_mul = deref->type->is_double() ? 8 : 4;
             emit_access(is_write, col_deref, base_offset,
                         deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns,
+                        row_major, deref->type->matrix_columns, packing,
                         writemask_for_size(col_deref->type->vector_elements));
          } else {
-            /* std140 always rounds the stride of arrays (and matrices) to a
-             * vec4, so matrices are always 16 between columns/rows. With
-             * doubles, they will be 32 apart when there are more than 2 rows.
-             */
-            int size_mul = (deref->type->is_double() &&
-                            deref->type->vector_elements > 2) ? 32 : 16;
+            int size_mul;
+
+            /* std430 doesn't round up vec2 size to a vec4 size */
+            if (packing == GLSL_INTERFACE_PACKING_STD430 &&
+                deref->type->vector_elements == 2 &&
+                !deref->type->is_double()) {
+               size_mul = 8;
+            } else {
+               /* std140 always rounds the stride of arrays (and matrices) to a
+                * vec4, so matrices are always 16 between columns/rows. With
+                * doubles, they will be 32 apart when there are more than 2 rows.
+                *
+                * For both std140 and std430, if the member is a
+                * three-'component vector with components consuming N basic
+                * machine units, the base alignment is 4N. For vec4, base
+                * alignment is 4N.
+                */
+               size_mul = (deref->type->is_double() &&
+                           deref->type->vector_elements > 2) ? 32 : 16;
+            }
+
             emit_access(is_write, col_deref, base_offset,
                         deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns,
+                        row_major, deref->type->matrix_columns, packing,
                         writemask_for_size(col_deref->type->vector_elements));
          }
       }
@@ -727,13 +764,15 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
    unsigned const_offset;
    bool row_major;
    int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
 
    /* Compute the offset to the start if the dereference as well as other
     * information we need to configure the write
     */
    setup_for_load_or_store(var, deref,
                            &offset, &const_offset,
-                           &row_major, &matrix_columns);
+                           &row_major, &matrix_columns,
+                           packing);
    assert(offset);
 
    /* Now emit writes from the temporary to memory */
@@ -747,7 +786,7 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
 
    deref = new(mem_ctx) ir_dereference_variable(write_var);
    emit_access(true, deref, write_offset, const_offset,
-               row_major, matrix_columns, write_mask);
+               row_major, matrix_columns, packing, write_mask);
 }
 
 ir_visitor_status
@@ -830,7 +869,8 @@ lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
 }
 
 unsigned
-lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref)
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
+                                                            unsigned packing)
 {
    unsigned array_stride = 0;
 
@@ -852,8 +892,12 @@ lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *dere
       const bool array_row_major =
          is_dereferenced_thing_row_major(deref_var);
 
-      array_stride = unsized_array_type->std140_size(array_row_major);
-      array_stride = glsl_align(array_stride, 16);
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
+         array_stride = unsized_array_type->std430_array_stride(array_row_major);
+      } else {
+         array_stride = unsized_array_type->std140_size(array_row_major);
+         array_stride = glsl_align(array_stride, 16);
+      }
       break;
    }
    case ir_type_dereference_record:
@@ -868,8 +912,13 @@ lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *dere
 
       const bool array_row_major =
          is_dereferenced_thing_row_major(deref_record);
-      array_stride = unsized_array_type->std140_size(array_row_major);
-      array_stride = glsl_align(array_stride, 16);
+
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
+         array_stride = unsized_array_type->std430_array_stride(array_row_major);
+      } else {
+         array_stride = unsized_array_type->std140_size(array_row_major);
+         array_stride = glsl_align(array_stride, 16);
+      }
       break;
    }
    default:
@@ -889,14 +938,16 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
    unsigned const_offset;
    bool row_major;
    int matrix_columns;
-   int unsized_array_stride = calculate_unsized_array_stride(deref);
+   unsigned packing = var->get_interface_type()->interface_packing;
+   int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
 
    /* Compute the offset to the start if the dereference as well as other
     * information we need to calculate the length.
     */
    setup_for_load_or_store(var, deref,
                            &base_offset, &const_offset,
-                           &row_major, &matrix_columns);
+                           &row_major, &matrix_columns,
+                           packing);
    /* array.length() =
     *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
     */
-- 
cgit v1.2.3


From a7b4ab45d08d8469daefb9f2af34ad6860b9fc3b Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 21 Apr 2015 12:12:05 +0200
Subject: glsl: a shader storage buffer must be smaller than the maximum size
 allowed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise, generate a link time error as per the
ARB_shader_storage_buffer_object spec.

v2:
- Fix error message (Jordan)

v3:
- Move std140_size() changes to its own patch (Kristian)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/link_uniform_blocks.cpp | 19 +++++++++++++++++++
 src/glsl/linker.cpp              |  2 +-
 src/glsl/linker.h                |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 8f65f4a3ab7..7ceffee799e 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -187,6 +187,7 @@ struct block {
 
 unsigned
 link_uniform_blocks(void *mem_ctx,
+                    struct gl_context *ctx,
                     struct gl_shader_program *prog,
                     struct gl_shader **shader_list,
                     unsigned num_shaders,
@@ -308,6 +309,15 @@ link_uniform_blocks(void *mem_ctx,
 
             blocks[i].UniformBufferSize = parcel.buffer_size;
 
+            /* Check SSBO size is lower than maximum supported size for SSBO */
+            if (b->is_shader_storage &&
+                parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+               linker_error(prog, "shader storage block `%s' has size %d, "
+                            "which is larger than than the maximum allowed (%d)",
+                            block_type->name,
+                            parcel.buffer_size,
+                            ctx->Const.MaxShaderStorageBlockSize);
+            }
             blocks[i].NumUniforms =
                (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
 
@@ -328,6 +338,15 @@ link_uniform_blocks(void *mem_ctx,
 
          blocks[i].UniformBufferSize = parcel.buffer_size;
 
+         /* Check SSBO size is lower than maximum supported size for SSBO */
+         if (b->is_shader_storage &&
+             parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+            linker_error(prog, "shader storage block `%s' has size %d, "
+                         "which is larger than than the maximum allowed (%d)",
+                         block_type->name,
+                         parcel.buffer_size,
+                         ctx->Const.MaxShaderStorageBlockSize);
+         }
          blocks[i].NumUniforms =
             (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 47d8b5ad1bf..c0520be4f15 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1996,7 +1996,7 @@ link_intrastage_shaders(void *mem_ctx,
 
    /* Link up uniform blocks defined within this stage. */
    const unsigned num_uniform_blocks =
-      link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
+      link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
                           &uniform_blocks);
    if (!prog->LinkStatus)
       return NULL;
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index 7c2bd59aeb7..c80be1c7e22 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -53,6 +53,7 @@ link_uniform_blocks_are_compatible(const gl_uniform_block *a,
 
 extern unsigned
 link_uniform_blocks(void *mem_ctx,
+                    struct gl_context *ctx,
                     struct gl_shader_program *prog,
                     struct gl_shader **shader_list,
                     unsigned num_shaders,
-- 
cgit v1.2.3


From 138e4ae8aee3c13e83c732ba0f6d705e8001050c Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 22 Apr 2015 16:58:45 +0200
Subject: glsl: number of active shader storage blocks must be within allowed
 limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Notice that we should differentiate between shader storage blocks and
uniform blocks, since they have different limits.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/linker.cpp | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index c0520be4f15..9be957a3f80 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2799,6 +2799,8 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
    unsigned blocks[MESA_SHADER_STAGES] = {0};
    unsigned total_uniform_blocks = 0;
+   unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
+   unsigned total_shader_storage_blocks = 0;
 
    for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
       if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
@@ -2810,8 +2812,15 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
       for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
 	 if (prog->UniformBlockStageIndex[j][i] != -1) {
-	    blocks[j]++;
-	    total_uniform_blocks++;
+            struct gl_shader *sh = prog->_LinkedShaders[j];
+            int stage_index = prog->UniformBlockStageIndex[j][i];
+            if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) {
+               shader_blocks[j]++;
+               total_shader_storage_blocks++;
+            } else {
+               blocks[j]++;
+               total_uniform_blocks++;
+            }
 	 }
       }
 
@@ -2832,6 +2841,24 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 	    }
 	 }
       }
+
+      if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
+         linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
+                      total_shader_storage_blocks,
+                      ctx->Const.MaxCombinedShaderStorageBlocks);
+      } else {
+         for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+            const unsigned max_shader_storage_blocks =
+               ctx->Const.Program[i].MaxShaderStorageBlocks;
+            if (shader_blocks[i] > max_shader_storage_blocks) {
+               linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
+                            _mesa_shader_stage_to_string(i),
+                            shader_blocks[i],
+                            max_shader_storage_blocks);
+               break;
+            }
+         }
+      }
    }
 }
 
@@ -2886,6 +2913,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 {
    unsigned total_image_units = 0;
    unsigned fragment_outputs = 0;
+   unsigned total_shader_storage_blocks = 0;
 
    if (!ctx->Extensions.ARB_shader_image_load_store)
       return;
@@ -2901,6 +2929,12 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
          total_image_units += sh->NumImages;
 
+         for (unsigned j = 0; j < prog->NumUniformBlocks; j++) {
+            int stage_index = prog->UniformBlockStageIndex[i][j];
+            if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage)
+               total_shader_storage_blocks++;
+         }
+
          if (i == MESA_SHADER_FRAGMENT) {
             foreach_in_list(ir_instruction, node, sh->ir) {
                ir_variable *var = node->as_variable();
@@ -2914,9 +2948,10 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
    if (total_image_units > ctx->Const.MaxCombinedImageUniforms)
       linker_error(prog, "Too many combined image uniforms\n");
 
-   if (total_image_units + fragment_outputs >
+   if (total_image_units + fragment_outputs + total_shader_storage_blocks >
        ctx->Const.MaxCombinedShaderOutputResources)
-      linker_error(prog, "Too many combined image uniforms and fragment outputs\n");
+      linker_error(prog, "Too many combined image uniforms, shader storage "
+                         " buffers and fragment outputs\n");
 }
 
 
-- 
cgit v1.2.3


From eb9a9b62b17d00f6536357a4de254899ae4ed2c7 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 15 May 2015 12:26:42 +0200
Subject: glsl: ignore buffer variables when counting uniform components
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/link_uniforms.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 7d6b51de06d..50a80732d73 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -322,7 +322,8 @@ public:
       : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
         num_shader_samplers(0), num_shader_images(0),
         num_shader_uniform_components(0), num_shader_subroutines(0),
-        is_ubo_var(false), map(map), hidden_map(hidden_map)
+        is_ubo_var(false), is_shader_storage(false), map(map),
+        hidden_map(hidden_map)
    {
       /* empty */
    }
@@ -339,6 +340,7 @@ public:
    {
       this->current_var = var;
       this->is_ubo_var = var->is_in_buffer_block();
+      this->is_shader_storage = var->is_in_shader_storage_block();
       if (var->is_interface_instance())
          program_resource_visitor::process(var->get_interface_type(),
                                            var->get_interface_type()->name);
@@ -379,6 +381,7 @@ public:
    unsigned num_shader_subroutines;
 
    bool is_ubo_var;
+   bool is_shader_storage;
 
    struct string_to_uint_map *map;
 
@@ -409,13 +412,14 @@ private:
           * components in the default block.  The spec allows image
           * uniforms to use up no more than one scalar slot.
           */
-         this->num_shader_uniform_components += values;
+         if(!is_shader_storage)
+            this->num_shader_uniform_components += values;
       } else {
 	 /* Accumulate the total number of uniform slots used by this shader.
 	  * Note that samplers do not count against this limit because they
 	  * don't use any storage on current hardware.
 	  */
-	 if (!is_ubo_var)
+	 if (!is_ubo_var && !is_shader_storage)
 	    this->num_shader_uniform_components += values;
       }
 
@@ -1118,8 +1122,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
       sh->num_combined_uniform_components = sh->num_uniform_components;
 
       for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
-	 sh->num_combined_uniform_components +=
-	    sh->UniformBlocks[i].UniformBufferSize / 4;
+         if (!sh->UniformBlocks[i].IsShaderStorage) {
+	    sh->num_combined_uniform_components +=
+	       sh->UniformBlocks[i].UniformBufferSize / 4;
+         }
       }
    }
 
-- 
cgit v1.2.3


From 203cd1bf28b88e041a693c867b4dd705236b0d5f Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 23 Jun 2015 08:53:36 +0200
Subject: glsl: shader storage blocks use different max block size values than
 uniforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/linker.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 9be957a3f80..aebf2560dab 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2803,13 +2803,23 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
    unsigned total_shader_storage_blocks = 0;
 
    for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
-      if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
+      /* Don't check SSBOs for Uniform Block Size */
+      if (!prog->UniformBlocks[i].IsShaderStorage &&
+          prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
          linker_error(prog, "Uniform block %s too big (%d/%d)\n",
                       prog->UniformBlocks[i].Name,
                       prog->UniformBlocks[i].UniformBufferSize,
                       ctx->Const.MaxUniformBlockSize);
       }
 
+      if (prog->UniformBlocks[i].IsShaderStorage &&
+          prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
+         linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
+                      prog->UniformBlocks[i].Name,
+                      prog->UniformBlocks[i].UniformBufferSize,
+                      ctx->Const.MaxShaderStorageBlockSize);
+      }
+
       for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
 	 if (prog->UniformBlockStageIndex[j][i] != -1) {
             struct gl_shader *sh = prog->_LinkedShaders[j];
-- 
cgit v1.2.3


From 402cb7ce13da9319c96b585c1f39810a3719fae8 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Fri, 24 Apr 2015 17:42:55 +0300
Subject: i965/vec4: Introduce VEC4 IR builder.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See "i965/fs: Introduce FS IR builder." for the rationale.

v2: Drop scalarizing VEC4 builder.
v3: Take a backend_shader as constructor argument.  Improve handling
    of debug annotations and execution control flags.  Rename "instr"
    variable.  Initialize cursor to NULL by default and add method to
    explicitly point the builder at the end of the program.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_vec4_builder.h | 602 +++++++++++++++++++++++++++
 2 files changed, 603 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_builder.h

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 2ef392a9f16..17cdabf7f11 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -116,6 +116,7 @@ i965_FILES = \
 	brw_urb.c \
 	brw_util.c \
 	brw_util.h \
+	brw_vec4_builder.h \
 	brw_vec4_copy_propagation.cpp \
 	brw_vec4.cpp \
 	brw_vec4_cse.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
new file mode 100644
index 00000000000..a90cadb77db
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -0,0 +1,602 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_BUILDER_H
+#define BRW_VEC4_BUILDER_H
+
+#include "brw_ir_vec4.h"
+#include "brw_ir_allocator.h"
+#include "brw_context.h"
+
+namespace brw {
+   /**
+    * Toolbox to assemble a VEC4 IR program out of individual instructions.
+    *
+    * This object is meant to have an interface consistent with
+    * brw::fs_builder.  They cannot be fully interchangeable because
+    * brw::fs_builder generates scalar code while brw::vec4_builder generates
+    * vector code.
+    */
+   class vec4_builder {
+   public:
+      /** Type used in this IR to represent a source of an instruction. */
+      typedef brw::src_reg src_reg;
+
+      /** Type used in this IR to represent the destination of an instruction. */
+      typedef brw::dst_reg dst_reg;
+
+      /** Type used in this IR to represent an instruction. */
+      typedef vec4_instruction instruction;
+
+      /**
+       * Construct a vec4_builder that inserts instructions into \p shader.
+       */
+      vec4_builder(backend_shader *shader) :
+         shader(shader), block(NULL), cursor(NULL),
+         force_writemask_all(false),
+         annotation()
+      {
+      }
+
+      /**
+       * Construct a vec4_builder that inserts instructions into \p shader
+       * before instruction \p inst in basic block \p block.  The default
+       * execution controls and debug annotation are initialized from the
+       * instruction passed as argument.
+       */
+      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
+         shader(shader), block(block), cursor(inst)
+      {
+         annotation.str = inst->annotation;
+         annotation.ir = inst->ir;
+      }
+
+      /**
+       * Construct a vec4_builder that inserts instructions before \p cursor
+       * in basic block \p block, inheriting other code generation parameters
+       * from this.
+       */
+      vec4_builder
+      at(bblock_t *block, exec_node *cursor) const
+      {
+         vec4_builder bld = *this;
+         bld.block = block;
+         bld.cursor = cursor;
+         return bld;
+      }
+
+      /**
+       * Construct a vec4_builder appending instructions at the end of the
+       * instruction list of the shader, inheriting other code generation
+       * parameters from this.
+       */
+      vec4_builder
+      at_end() const
+      {
+         return at(NULL, (exec_node *)&shader->instructions.tail);
+      }
+
+      /**
+       * Construct a builder with per-channel control flow execution masking
+       * disabled if \p b is true.  If control flow execution masking is
+       * already disabled this has no effect.
+       */
+      vec4_builder
+      exec_all(bool b = true) const
+      {
+         vec4_builder bld = *this;
+         if (b)
+            bld.force_writemask_all = true;
+         return bld;
+      }
+
+      /**
+       * Construct a builder with the given debug annotation info.
+       */
+      vec4_builder
+      annotate(const char *str, const void *ir = NULL) const
+      {
+         vec4_builder bld = *this;
+         bld.annotation.str = str;
+         bld.annotation.ir = ir;
+         return bld;
+      }
+
+      /**
+       * Get the SIMD width in use.
+       */
+      unsigned
+      dispatch_width() const
+      {
+         return 8;
+      }
+
+      /**
+       * Allocate a virtual register of natural vector size (four for this IR)
+       * and SIMD width.  \p n gives the amount of space to allocate in
+       * dispatch_width units (which is just enough space for four logical
+       * components in this IR).
+       */
+      dst_reg
+      vgrf(enum brw_reg_type type, unsigned n = 1) const
+      {
+         assert(dispatch_width() <= 32);
+
+         if (n > 0)
+            return retype(dst_reg(GRF, shader->alloc.allocate(
+                                     n * DIV_ROUND_UP(type_sz(type), 4))),
+                           type);
+         else
+            return retype(null_reg_ud(), type);
+      }
+
+      /**
+       * Create a null register of floating type.
+       */
+      dst_reg
+      null_reg_f() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_F));
+      }
+
+      /**
+       * Create a null register of signed integer type.
+       */
+      dst_reg
+      null_reg_d() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_D));
+      }
+
+      /**
+       * Create a null register of unsigned integer type.
+       */
+      dst_reg
+      null_reg_ud() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_UD));
+      }
+
+      /**
+       * Insert an instruction into the program.
+       */
+      instruction *
+      emit(const instruction &inst) const
+      {
+         return emit(new(shader->mem_ctx) instruction(inst));
+      }
+
+      /**
+       * Create and insert a nullary control instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode) const
+      {
+         return emit(instruction(opcode));
+      }
+
+      /**
+       * Create and insert a nullary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst) const
+      {
+         return emit(instruction(opcode, dst));
+      }
+
+      /**
+       * Create and insert a unary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_RCP:
+         case SHADER_OPCODE_RSQ:
+         case SHADER_OPCODE_SQRT:
+         case SHADER_OPCODE_EXP2:
+         case SHADER_OPCODE_LOG2:
+         case SHADER_OPCODE_SIN:
+         case SHADER_OPCODE_COS:
+            return fix_math_instruction(
+               emit(instruction(opcode, dst,
+                                fix_math_operand(src0))));
+
+         default:
+            return emit(instruction(opcode, dst, src0));
+         }
+      }
+
+      /**
+       * Create and insert a binary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_POW:
+         case SHADER_OPCODE_INT_QUOTIENT:
+         case SHADER_OPCODE_INT_REMAINDER:
+            return fix_math_instruction(
+               emit(instruction(opcode, dst,
+                                fix_math_operand(src0),
+                                fix_math_operand(src1))));
+
+         default:
+            return emit(instruction(opcode, dst, src0, src1));
+         }
+      }
+
+      /**
+       * Create and insert a ternary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1, const src_reg &src2) const
+      {
+         switch (opcode) {
+         case BRW_OPCODE_BFE:
+         case BRW_OPCODE_BFI2:
+         case BRW_OPCODE_MAD:
+         case BRW_OPCODE_LRP:
+            return emit(instruction(opcode, dst,
+                                    fix_3src_operand(src0),
+                                    fix_3src_operand(src1),
+                                    fix_3src_operand(src2)));
+
+         default:
+            return emit(instruction(opcode, dst, src0, src1, src2));
+         }
+      }
+
+      /**
+       * Insert a preallocated instruction into the program.
+       */
+      instruction *
+      emit(instruction *inst) const
+      {
+         inst->force_writemask_all = force_writemask_all;
+         inst->annotation = annotation.str;
+         inst->ir = annotation.ir;
+
+         if (block)
+            static_cast<instruction *>(cursor)->insert_before(block, inst);
+         else
+            cursor->insert_before(inst);
+
+         return inst;
+      }
+
+      /**
+       * Select \p src0 if the comparison of both sources with the given
+       * conditional mod evaluates to true, otherwise select \p src1.
+       *
+       * Generally useful to get the minimum or maximum of two values.
+       */
+      void
+      emit_minmax(const dst_reg &dst, const src_reg &src0,
+                  const src_reg &src1, brw_conditional_mod mod) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+         } else {
+            CMP(null_reg_d(), src0, src1, mod);
+            set_predicate(BRW_PREDICATE_NORMAL,
+                          SEL(dst, src0, src1));
+         }
+      }
+
+      /**
+       * Copy any live channel from \p src to the first channel of the result.
+       */
+      src_reg
+      emit_uniformize(const src_reg &src) const
+      {
+         const vec4_builder ubld = exec_all();
+         const dst_reg chan_index =
+            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
+         const dst_reg dst = vgrf(src.type);
+
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
+
+         return src_reg(dst);
+      }
+
+      /**
+       * Assorted arithmetic ops.
+       * @{
+       */
+#define ALU1(op)                                        \
+      instruction *                                     \
+      op(const dst_reg &dst, const src_reg &src0) const \
+      {                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0);       \
+      }
+
+#define ALU2(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
+      }
+
+#define ALU2_ACC(op)                                                    \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
+         inst->writes_accumulator = true;                               \
+         return inst;                                                   \
+      }
+
+#define ALU3(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
+         const src_reg &src2) const                                     \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
+      }
+
+      ALU2(ADD)
+      ALU2_ACC(ADDC)
+      ALU2(AND)
+      ALU2(ASR)
+      ALU2(AVG)
+      ALU3(BFE)
+      ALU2(BFI1)
+      ALU3(BFI2)
+      ALU1(BFREV)
+      ALU1(CBIT)
+      ALU2(CMPN)
+      ALU3(CSEL)
+      ALU2(DP2)
+      ALU2(DP3)
+      ALU2(DP4)
+      ALU2(DPH)
+      ALU1(F16TO32)
+      ALU1(F32TO16)
+      ALU1(FBH)
+      ALU1(FBL)
+      ALU1(FRC)
+      ALU2(LINE)
+      ALU1(LZD)
+      ALU2(MAC)
+      ALU2_ACC(MACH)
+      ALU3(MAD)
+      ALU1(MOV)
+      ALU2(MUL)
+      ALU1(NOT)
+      ALU2(OR)
+      ALU2(PLN)
+      ALU1(RNDD)
+      ALU1(RNDE)
+      ALU1(RNDU)
+      ALU1(RNDZ)
+      ALU2(SAD2)
+      ALU2_ACC(SADA2)
+      ALU2(SEL)
+      ALU2(SHL)
+      ALU2(SHR)
+      ALU2_ACC(SUBB)
+      ALU2(XOR)
+
+#undef ALU3
+#undef ALU2_ACC
+#undef ALU2
+#undef ALU1
+      /** @} */
+
+      /**
+       * CMP: Sets the low bit of the destination channels with the result
+       * of the comparison, while the upper bits are undefined, and updates
+       * the flag register with the packed 16 bits of the result.
+       */
+      instruction *
+      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+          brw_conditional_mod condition) const
+      {
+         /* Take the instruction:
+          *
+          * CMP null<d> src0<f> src1<f>
+          *
+          * Original gen4 does type conversion to the destination type
+          * before comparison, producing garbage results for floating
+          * point comparisons.
+          *
+          * The destination type doesn't matter on newer generations,
+          * so we set the type to match src0 so we can compact the
+          * instruction.
+          */
+         return set_condmod(condition,
+                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
+                                 fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+      }
+
+      /**
+       * Gen4 predicated IF.
+       */
+      instruction *
+      IF(brw_predicate predicate) const
+      {
+         return set_predicate(predicate, emit(BRW_OPCODE_IF));
+      }
+
+      /**
+       * Gen6 IF with embedded comparison.
+       */
+      instruction *
+      IF(const src_reg &src0, const src_reg &src1,
+         brw_conditional_mod condition) const
+      {
+         assert(shader->devinfo->gen == 6);
+         return set_condmod(condition,
+                            emit(BRW_OPCODE_IF,
+                                 null_reg_d(),
+                                 fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+      }
+
+      /**
+       * Emit a linear interpolation instruction.
+       */
+      instruction *
+      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
+          const src_reg &a) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+             * we need to reorder the operands.
+             */
+            return emit(BRW_OPCODE_LRP, dst, a, y, x);
+
+         } else {
+            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
+            const dst_reg y_times_a = vgrf(dst.type);
+            const dst_reg one_minus_a = vgrf(dst.type);
+            const dst_reg x_times_one_minus_a = vgrf(dst.type);
+
+            MUL(y_times_a, y, a);
+            ADD(one_minus_a, negate(a), src_reg(1.0f));
+            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
+            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
+         }
+      }
+
+      backend_shader *shader;
+
+   protected:
+      /**
+       * Workaround for negation of UD registers.  See comment in
+       * fs_generator::generate_code() for the details.
+       */
+      src_reg
+      fix_unsigned_negate(const src_reg &src) const
+      {
+         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
+            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
+            MOV(temp, src);
+            return src_reg(temp);
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround for register access modes not supported by the ternary
+       * instruction encoding.
+       */
+      src_reg
+      fix_3src_operand(const src_reg &src) const
+      {
+         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
+          * able to use vertical stride of zero to replicate the vec4 uniform, like
+          *
+          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
+          *
+          * But you can't, since vertical stride is always four in three-source
+          * instructions. Instead, insert a MOV instruction to do the replication so
+          * that the three-source instruction can consume it.
+          */
+
+         /* The MOV is only needed if the source is a uniform or immediate. */
+         if (src.file != UNIFORM && src.file != IMM)
+            return src;
+
+         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
+            return src;
+
+         const dst_reg expanded = vgrf(src.type);
+         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
+         return src_reg(expanded);
+      }
+
+      /**
+       * Workaround for register access modes not supported by the math
+       * instruction.
+       */
+      src_reg
+      fix_math_operand(const src_reg &src) const
+      {
+         /* The gen6 math instruction ignores the source modifiers --
+          * swizzle, abs, negate, and at least some parts of the register
+          * region description.
+          *
+          * Rather than trying to enumerate all these cases, *always* expand the
+          * operand to a temp GRF for gen6.
+          *
+          * For gen7, keep the operand as-is, except if immediate, which gen7 still
+          * can't use.
+          */
+         if (shader->devinfo->gen == 6 ||
+             (shader->devinfo->gen == 7 && src.file == IMM)) {
+            const dst_reg tmp = vgrf(src.type);
+            MOV(tmp, src);
+            return src_reg(tmp);
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround other weirdness of the math instruction.
+       */
+      instruction *
+      fix_math_instruction(instruction *inst) const
+      {
+         if (shader->devinfo->gen == 6 &&
+             inst->dst.writemask != WRITEMASK_XYZW) {
+            const dst_reg tmp = vgrf(inst->dst.type);
+            MOV(inst->dst, src_reg(tmp));
+            inst->dst = tmp;
+
+         } else if (shader->devinfo->gen < 6) {
+            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
+            inst->base_mrf = 1;
+            inst->mlen = sources;
+         }
+
+         return inst;
+      }
+
+      bblock_t *block;
+      exec_node *cursor;
+
+      bool force_writemask_all;
+
+      /** Debug annotation info. */
+      struct {
+         const char *str;
+         const void *ir;
+      } annotation;
+   };
+}
+
+#endif
-- 
cgit v1.2.3


From d5503ce39ffb056de8d3c9c78870aa7f1b3432d2 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 29 Apr 2015 02:12:27 +0300
Subject: i965/vec4: Import helpers to convert vectors into arrays and back.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These functions handle the conversion of a vec4 into the form expected
by the dataport unit in message and message return payloads.  The
conversion is not always trivial because some messages don't support
SIMD4x2 for some generations, in which case a strided copy may be
necessary.

v2: Split from the FS implementation.
v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/Makefile.sources         |  2 +
 .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 98 ++++++++++++++++++++++
 .../drivers/dri/i965/brw_vec4_surface_builder.h    | 30 +++++++
 3 files changed, 130 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 17cdabf7f11..cc3ecaf7a8b 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -130,6 +130,8 @@ i965_FILES = \
 	brw_vec4_nir.cpp \
 	brw_vec4_gs_nir.cpp \
 	brw_vec4_reg_allocate.cpp \
+	brw_vec4_surface_builder.cpp \
+	brw_vec4_surface_builder.h \
 	brw_vec4_visitor.cpp \
 	brw_vec4_vp.cpp \
 	brw_vec4_vs_visitor.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
new file mode 100644
index 00000000000..b77cd74ebaa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4_surface_builder.h"
+
+using namespace brw;
+
+namespace {
+   namespace array_utils {
+      /**
+       * Copy one every \p src_stride logical components of the argument into
+       * one every \p dst_stride logical components of the result.
+       */
+      src_reg
+      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
+                  unsigned dst_stride, unsigned src_stride)
+      {
+         if (src_stride == 1 && dst_stride == 1) {
+            return src;
+         } else {
+            const dst_reg dst = bld.vgrf(src.type,
+                                         DIV_ROUND_UP(size * dst_stride, 4));
+
+            for (unsigned i = 0; i < size; ++i)
+               bld.MOV(writemask(offset(dst, i * dst_stride / 4),
+                                 1 << (i * dst_stride % 4)),
+                       swizzle(offset(src, i * src_stride / 4),
+                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Convert a VEC4 into an array of registers with the layout expected by
+       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
+       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
+       * a SIMD8 vector.
+       */
+      src_reg
+      emit_insert(const vec4_builder &bld, const src_reg &src,
+                  unsigned n, bool has_simd4x2)
+      {
+         if (src.file == BAD_FILE || n == 0) {
+            return src_reg();
+
+         } else {
+            /* Pad unused components with zeroes. */
+            const unsigned mask = (1 << n) - 1;
+            const dst_reg tmp = bld.vgrf(src.type);
+
+            bld.MOV(writemask(tmp, mask), src);
+            if (n < 4)
+               bld.MOV(writemask(tmp, ~mask), 0);
+
+            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
+         }
+      }
+
+      /**
+       * Convert an array of registers back into a VEC4 according to the
+       * layout expected from some shared unit.  If \p has_simd4x2 is true the
+       * argument is left unmodified in SIMD4x2 form, otherwise it will be
+       * rearranged from SIMD8 form.
+       */
+      src_reg
+      emit_extract(const vec4_builder &bld, const src_reg src,
+                   unsigned n, bool has_simd4x2)
+      {
+         if (src.file == BAD_FILE || n == 0) {
+            return src_reg();
+
+         } else {
+            return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
+         }
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
new file mode 100644
index 00000000000..8a1a22ee841
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
@@ -0,0 +1,30 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_SURFACE_BUILDER_H
+#define BRW_VEC4_SURFACE_BUILDER_H
+
+#include "brw_vec4_builder.h"
+
+#endif
-- 
cgit v1.2.3


From f17c6b90660f4e156e76f4fa333af86cda2a0125 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 1 Aug 2015 17:15:36 +0300
Subject: i965/vec4: Import surface message builder functions.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement helper functions that can be used to construct and send
untyped and typed surface read, write and atomic messages to the
shared dataport unit.

v2: Split from the FS implementation.
v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 234 +++++++++++++++++++++
 .../drivers/dri/i965/brw_vec4_surface_builder.h    |  39 ++++
 2 files changed, 273 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index b77cd74ebaa..a7c286d3ac1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -96,3 +96,237 @@ namespace {
       }
    }
 }
+
+namespace brw {
+   namespace surface_access {
+      namespace {
+         using namespace array_utils;
+
+         /**
+          * Generate a send opcode for a surface message and return the
+          * result.
+          */
+         src_reg
+         emit_send(const vec4_builder &bld, enum opcode op,
+                   const src_reg &header,
+                   const src_reg &addr, unsigned addr_sz,
+                   const src_reg &src, unsigned src_sz,
+                   const src_reg &surface,
+                   unsigned arg, unsigned ret_sz,
+                   brw_predicate pred = BRW_PREDICATE_NONE)
+         {
+            /* Calculate the total number of components of the payload. */
+            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
+            const unsigned sz = header_sz + addr_sz + src_sz;
+
+            /* Construct the payload. */
+            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+            unsigned n = 0;
+
+            if (header_sz)
+               bld.exec_all().MOV(offset(payload, n++),
+                                  retype(header, BRW_REGISTER_TYPE_UD));
+
+            for (unsigned i = 0; i < addr_sz; i++)
+               bld.MOV(offset(payload, n++),
+                       offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
+
+            for (unsigned i = 0; i < src_sz; i++)
+               bld.MOV(offset(payload, n++),
+                       offset(retype(src, BRW_REGISTER_TYPE_UD), i));
+
+            /* Reduce the dynamically uniform surface index to a single
+             * scalar.
+             */
+            const src_reg usurface = bld.emit_uniformize(surface);
+
+            /* Emit the message send instruction. */
+            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
+            vec4_instruction *inst =
+               bld.emit(op, dst, src_reg(payload), usurface, arg);
+            inst->mlen = sz;
+            inst->regs_written = ret_sz;
+            inst->header_size = header_sz;
+            inst->predicate = pred;
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Emit an untyped surface read opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the returned value.
+       */
+      src_reg
+      emit_untyped_read(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred)
+      {
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
+                          emit_insert(bld, addr, dims, true), 1,
+                          src_reg(), 0,
+                          surface, size, 1, pred);
+      }
+
+      /**
+       * Emit an untyped surface write opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the argument.
+       */
+      void
+      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+                         const src_reg &addr, const src_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
+                   emit_insert(bld, addr, dims, has_simd4x2),
+                   has_simd4x2 ? 1 : dims,
+                   emit_insert(bld, src, size, has_simd4x2),
+                   has_simd4x2 ? 1 : size,
+                   surface, size, 0, pred);
+      }
+
+      /**
+       * Emit an untyped surface atomic opcode.  \p dims determines the number
+       * of components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      src_reg
+      emit_untyped_atomic(const vec4_builder &bld,
+                          const src_reg &surface, const src_reg &addr,
+                          const src_reg &src0, const src_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+         if (size >= 1)
+            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+         if (size >= 2)
+            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
+                          emit_insert(bld, addr, dims, has_simd4x2),
+                          has_simd4x2 ? 1 : dims,
+                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+                          has_simd4x2 ? 1 : size,
+                          surface, op, rsize, pred);
+      }
+
+      namespace {
+         /**
+          * Initialize the header present in typed surface messages.
+          */
+         src_reg
+         emit_typed_message_header(const vec4_builder &bld)
+         {
+            const vec4_builder ubld = bld.exec_all();
+            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+            ubld.MOV(dst, src_reg(0));
+
+            if (bld.shader->devinfo->gen == 7 &&
+                !bld.shader->devinfo->is_haswell) {
+               /* The sample mask is used on IVB for the SIMD8 messages that
+                * have no SIMD4x2 variant.  We only use the two X channels
+                * in that case, mask everything else out.
+                */
+               ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11));
+            }
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Emit a typed surface read opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * returned value.
+       */
+      src_reg
+      emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+                      const src_reg &addr, unsigned dims, unsigned size)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         const src_reg tmp =
+            emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
+                      emit_typed_message_header(bld),
+                      emit_insert(bld, addr, dims, has_simd4x2),
+                      has_simd4x2 ? 1 : dims,
+                      src_reg(), 0,
+                      surface, size,
+                      has_simd4x2 ? 1 : size);
+
+         return emit_extract(bld, tmp, size, has_simd4x2);
+      }
+
+      /**
+       * Emit a typed surface write opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * argument.
+       */
+      void
+      emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+                       const src_reg &addr, const src_reg &src,
+                       unsigned dims, unsigned size)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
+                   emit_typed_message_header(bld),
+                   emit_insert(bld, addr, dims, has_simd4x2),
+                   has_simd4x2 ? 1 : dims,
+                   emit_insert(bld, src, size, has_simd4x2),
+                   has_simd4x2 ? 1 : size,
+                   surface, size, 0);
+      }
+
+      /**
+       * Emit a typed surface atomic opcode.  \p dims determines the number of
+       * components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      src_reg
+      emit_typed_atomic(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        const src_reg &src0, const src_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+         if (size >= 1)
+            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+         if (size >= 2)
+            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+         return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
+                          emit_typed_message_header(bld),
+                          emit_insert(bld, addr, dims, has_simd4x2),
+                          has_simd4x2 ? 1 : dims,
+                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+                          has_simd4x2 ? 1 : size,
+                          surface, op, rsize, pred);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
index 8a1a22ee841..6e61c0fce9b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
@@ -27,4 +27,43 @@
 
 #include "brw_vec4_builder.h"
 
+namespace brw {
+   namespace surface_access {
+      src_reg
+      emit_untyped_read(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+
+      void
+      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+                         const src_reg &addr, const src_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred = BRW_PREDICATE_NONE);
+
+      src_reg
+      emit_untyped_atomic(const vec4_builder &bld,
+                          const src_reg &surface, const src_reg &addr,
+                          const src_reg &src0, const src_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred = BRW_PREDICATE_NONE);
+
+      src_reg
+      emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+                      const src_reg &addr, unsigned dims, unsigned size);
+
+      void
+      emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+                       const src_reg &addr, const src_reg &src,
+                       unsigned dims, unsigned size);
+
+      src_reg
+      emit_typed_atomic(const vec4_builder &bld, const src_reg &surface,
+                        const src_reg &addr,
+                        const src_reg &src0, const src_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+   }
+}
+
 #endif
-- 
cgit v1.2.3


From 9bb7d9ecf881a10f198fba2c52ffb8bf3d557d6a Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 9 Jul 2015 10:26:42 +0200
Subject: nir: Implement __intrinsic_store_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2 (Connor):
 - Make the STORE() macro take arguments for the extra sources (and their
   size) and any extra indices required.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp  | 36 ++++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_intrinsics.h | 20 ++++++++++++--------
 2 files changed, 48 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d114da49272..add6b6261b4 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -649,6 +649,8 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_image_size;
       } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
          op = nir_intrinsic_image_samples;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+         op = nir_intrinsic_store_ssbo;
       } else {
          unreachable("not reached");
       }
@@ -747,6 +749,40 @@ nir_visitor::visit(ir_call *ir)
       }
       case nir_intrinsic_memory_barrier:
          break;
+      case nir_intrinsic_store_ssbo: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+         assert(write_mask);
+
+         /* Check if we need the indirect version */
+         ir_constant *const_offset = offset->as_constant();
+         if (!const_offset) {
+            op = nir_intrinsic_store_ssbo_indirect;
+            ralloc_free(instr);
+            instr = nir_intrinsic_instr_create(shader, op);
+            instr->src[2] = evaluate_rvalue(offset);
+            instr->const_index[0] = 0;
+         } else {
+            instr->const_index[0] = const_offset->value.u[0];
+         }
+
+         instr->const_index[1] = write_mask->value.u[0];
+
+         instr->src[0] = evaluate_rvalue(val);
+         instr->num_components = val->type->vector_elements;
+
+         instr->src[1] = evaluate_rvalue(block);
+         break;
+      }
       default:
          unreachable("not reached");
       }
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 271dc42d984..d0dd7b9ee60 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -205,15 +205,19 @@ LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /*
  * Stores work the same way as loads, except now the first register input is
  * the value or array to store and the optional second input is the indirect
- * offset.
+ * offset. SSBO stores are similar, but they accept an extra source for the
+ * block index and an extra index with the writemask to use.
  */
 
-#define STORE(name, num_indices, flags) \
-   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
-             num_indices, flags) \
+#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+   INTRINSIC(store_##name, 1 + extra_srcs, \
+             ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+             false, 0, 0, 1 + extra_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+             ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+             false, 0, 0, 1 + extra_indices, flags)
 
-STORE(output, 1, 0)
-/* STORE(ssbo, 2, 0) */
+STORE(output, 0, 0, 0, 0)
+STORE(ssbo, 1, 1, 1, 0)
 
-LAST_INTRINSIC(store_output_indirect)
+LAST_INTRINSIC(store_ssbo_indirect)
-- 
cgit v1.2.3


From 337dad8ceeb4f313a47b4ddb31805f355c3fc3a5 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 1 Jun 2015 09:26:01 +0200
Subject: i965/nir/fs: Implement nir_intrinsic_store_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 71 ++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 97aef61657f..bcb5e1bc881 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -29,8 +29,10 @@
 #include "brw_fs.h"
 #include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
+#include "brw_fs_surface_builder.h"
 
 using namespace brw;
+using namespace brw::surface_access;
 
 void
 fs_visitor::emit_nir_code()
@@ -1699,6 +1701,75 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_store_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      fs_reg surf_index;
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[1]);
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[1]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
+
+      /* Offset */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Value */
+      fs_reg val_reg = get_nir_src(instr->src[0]);
+
+      /* Writemask */
+      unsigned writemask = instr->const_index[1];
+
+      /* Write each component present in the writemask */
+      unsigned skipped_channels = 0;
+      for (int i = 0; i < instr->num_components; i++) {
+         int component_mask = 1 << i;
+         if (writemask & component_mask) {
+            if (skipped_channels) {
+               if (!has_indirect) {
+                  const_offset_bytes += 4 * skipped_channels;
+                  bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+               } else {
+                  bld.ADD(offset_reg, offset_reg,
+                           brw_imm_ud(4 * skipped_channels));
+               }
+               skipped_channels = 0;
+            }
+
+            emit_untyped_write(bld, surf_index, offset_reg,
+                               offset(val_reg, bld, i),
+                               1 /* dims */, 1 /* size */,
+                               BRW_PREDICATE_NONE);
+         }
+
+         skipped_channels++;
+      }
+      break;
+   }
+
    case nir_intrinsic_store_output_indirect:
       has_indirect = true;
       /* fallthrough */
-- 
cgit v1.2.3


From 922b3d1bb16b4b6b292cb159e5fe3d0615ca725c Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 10 Sep 2015 12:00:04 +0200
Subject: i965/nir/vec4: Implement nir_intrinsic_store_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 148 +++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 0db2291d84e..d67c2d86f91 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -23,8 +23,13 @@
 
 #include "brw_nir.h"
 #include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_vec4_surface_builder.h"
 #include "glsl/ir_uniform.h"
 
+using namespace brw;
+using namespace brw::surface_access;
+
 namespace brw {
 
 void
@@ -564,6 +569,149 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
+   case nir_intrinsic_store_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      src_reg surf_index;
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[1]);
+      if (const_uniform_block) {
+         unsigned index = prog_data->base.binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = src_reg(index);
+         brw_mark_surface_used(&prog_data->base, index);
+      } else {
+         surf_index = src_reg(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+                  src_reg(prog_data->base.binding_table.ubo_start)));
+         surf_index = emit_uniformize(surf_index);
+
+         brw_mark_surface_used(&prog_data->base,
+                               prog_data->base.binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
+
+      /* Offset */
+      src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+      }
+
+      /* Value */
+      src_reg val_reg = get_nir_src(instr->src[0], 4);
+
+      /* Writemask */
+      unsigned write_mask = instr->const_index[1];
+
+      /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+       * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+       * typed and untyped messages and across hardware platforms, the
+       * current implementation of the untyped messages will transparently convert
+       * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+       * and enabling only channel X on the SEND instruction.
+       *
+       * The above, works well for full vector writes, but not for partial writes
+       * where we want to write some channels and not others, like when we have
+       * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+       * quite restrictive with regards to the channel enables we can configure in
+       * the message descriptor (not all combinations are allowed) we cannot simply
+       * implement these scenarios with a single message while keeping the
+       * aforementioned symmetry in the implementation. For now we de decided that
+       * it is better to keep the symmetry to reduce complexity, so in situations
+       * such as the one described we end up emitting two untyped write messages
+       * (one for xy and another for w).
+       *
+       * The code below packs consecutive channels into a single write message,
+       * detects gaps in the vector write and if needed, sends a second message
+       * with the remaining channels. If in the future we decide that we want to
+       * emit a single message at the expense of losing the symmetry in the
+       * implementation we can:
+       *
+       * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+       *    message payload. In this mode we can write up to 8 offsets and dwords
+       *    to the red channel only (for the two vec4s in the SIMD4x2 execution)
+       *    and select which of the 8 channels carry data to write by setting the
+       *    appropriate writemask in the dst register of the SEND instruction.
+       *    It would require to write a new generator opcode specifically for
+       *    IvyBridge since we would need to prepare a SIMD8 payload that could
+       *    use any channel, not just X.
+       *
+       * 2) For Haswell+: Simply send a single write message but set the writemask
+       *    on the dst of the SEND instruction to select the channels we want to
+       *    write. It would require to modify the current messages to receive
+       *    and honor the writemask provided.
+       */
+      const vec4_builder bld = vec4_builder(this).at_end()
+                               .annotate(current_annotation, base_ir);
+
+      int swizzle[4] = { 0, 0, 0, 0};
+      int num_channels = 0;
+      unsigned skipped_channels = 0;
+      int num_components = instr->num_components;
+      for (int i = 0; i < num_components; i++) {
+         /* Check if this channel needs to be written. If so, record the
+          * channel we need to take the data from in the swizzle array
+          */
+         int component_mask = 1 << i;
+         int write_test = write_mask & component_mask;
+         if (write_test)
+            swizzle[num_channels++] = i;
+
+         /* If we don't have to write this channel it means we have a gap in the
+          * vector, so write the channels we accumulated until now, if any. Do
+          * the same if this was the last component in the vector.
+          */
+         if (!write_test || i == num_components - 1) {
+            if (num_channels > 0) {
+               /* We have channels to write, so update the offset we need to
+                * write at to skip the channels we skipped, if any.
+                */
+               if (skipped_channels > 0) {
+                  if (!has_indirect) {
+                     const_offset_bytes += 4 * skipped_channels;
+                     offset_reg = src_reg(const_offset_bytes);
+                  } else {
+                     emit(ADD(dst_reg(offset_reg), offset_reg,
+                              brw_imm_ud(4 * skipped_channels)));
+                  }
+               }
+
+               /* Swizzle the data register so we take the data from the channels
+                * we need to write and send the write message. This will write
+                * num_channels consecutive dwords starting at offset.
+                */
+               val_reg.swizzle =
+                  BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+               emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                                  1 /* dims */, num_channels /* size */,
+                                  BRW_PREDICATE_NONE);
+
+               /* If we have to do a second write we will have to update the
+                * offset so that we jump over the channels we have just written
+                * now.
+                */
+               skipped_channels = num_channels;
+
+               /* Restart the count for the next write message */
+               num_channels = 0;
+            }
+
+            /* We did not write the current channel, so increase skipped count */
+            skipped_channels++;
+         }
+      }
+
+      break;
+   }
+
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-- 
cgit v1.2.3


From 3e70c968de363168fc491ca673a4798284a10c44 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 23 Sep 2015 08:48:21 +0200
Subject: nir: modify the instruction insertion in nir_visitor::visit(ir_call
 *ir)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch moves nir_instr_insert_after_cf_list call into each case
in the intrinsics switch at nir_visitor::visit(ir_call *ir) and
define a nir_dest variable which will be used when handling
ir->return_deref after the switch.

This patch simplifies the code for nir_intrinsic_load_ssbo
implementation changes we are going to do next.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index add6b6261b4..d0b769a9c13 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -656,6 +656,7 @@ nir_visitor::visit(ir_call *ir)
       }
 
       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+      nir_dest *dest = &instr->dest;
 
       switch (op) {
       case nir_intrinsic_atomic_counter_read_var:
@@ -665,6 +666,7 @@ nir_visitor::visit(ir_call *ir)
             (ir_dereference *) ir->actual_parameters.get_head();
          instr->variables[0] = evaluate_deref(&instr->instr, param);
          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
       case nir_intrinsic_image_load:
@@ -702,8 +704,10 @@ nir_visitor::visit(ir_call *ir)
          }
 
          if (op == nir_intrinsic_image_size ||
-             op == nir_intrinsic_image_samples)
+             op == nir_intrinsic_image_samples) {
+            nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
             break;
+         }
 
          /* Set the address argument, extending the coordinate vector to four
           * components.
@@ -745,9 +749,11 @@ nir_visitor::visit(ir_call *ir)
             instr->src[3] = evaluate_rvalue((ir_dereference *)param);
             param = param->get_next();
          }
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
       case nir_intrinsic_memory_barrier:
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       case nir_intrinsic_store_ssbo: {
          exec_node *param = ir->actual_parameters.get_head();
@@ -771,6 +777,7 @@ nir_visitor::visit(ir_call *ir)
             instr = nir_intrinsic_instr_create(shader, op);
             instr->src[2] = evaluate_rvalue(offset);
             instr->const_index[0] = 0;
+            dest = &instr->dest;
          } else {
             instr->const_index[0] = const_offset->value.u[0];
          }
@@ -781,14 +788,13 @@ nir_visitor::visit(ir_call *ir)
          instr->num_components = val->type->vector_elements;
 
          instr->src[1] = evaluate_rvalue(block);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
       default:
          unreachable("not reached");
       }
 
-      nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
-
       if (ir->return_deref) {
          nir_intrinsic_instr *store_instr =
             nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
@@ -796,7 +802,7 @@ nir_visitor::visit(ir_call *ir)
 
          store_instr->variables[0] =
             evaluate_deref(&store_instr->instr, ir->return_deref);
-         store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+         store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
 
          nir_instr_insert_after_cf_list(this->cf_node_list,
                                         &store_instr->instr);
-- 
cgit v1.2.3


From e59ae238b606af0f3ec5c722ac2d1495caed091a Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 9 Jul 2015 10:29:18 +0200
Subject: nir: Implement __intrinsic_load_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Fix ssbo loads with boolean variables.

v3:
- Simplify the changes (Kristian)

Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp            | 67 +++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_intrinsics.h           |  2 +-
 src/glsl/nir/nir_lower_phis_to_scalar.c |  2 +
 3 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d0b769a9c13..d1e2488a69e 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -651,6 +651,8 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_image_samples;
       } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
          op = nir_intrinsic_store_ssbo;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+         op = nir_intrinsic_load_ssbo;
       } else {
          unreachable("not reached");
       }
@@ -791,6 +793,71 @@ nir_visitor::visit(ir_call *ir)
          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
+      case nir_intrinsic_load_ssbo: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         /* Check if we need the indirect version */
+         ir_constant *const_offset = offset->as_constant();
+         if (!const_offset) {
+            op = nir_intrinsic_load_ssbo_indirect;
+            ralloc_free(instr);
+            instr = nir_intrinsic_instr_create(shader, op);
+            instr->src[1] = evaluate_rvalue(offset);
+            instr->const_index[0] = 0;
+            dest = &instr->dest;
+         } else {
+            instr->const_index[0] = const_offset->value.u[0];
+         }
+
+         instr->src[0] = evaluate_rvalue(block);
+
+         const glsl_type *type = ir->return_deref->var->type;
+         instr->num_components = type->vector_elements;
+
+         /* Setup destination register */
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           type->vector_elements, NULL);
+
+         /* Insert the created nir instruction now since in the case of boolean
+          * result we will need to emit another instruction after it
+          */
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+         /*
+          * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+          * consider a true boolean to be ~0. Fix this up with a != 0
+          * comparison.
+          */
+         if (type->base_type == GLSL_TYPE_BOOL) {
+            nir_load_const_instr *const_zero =
+               nir_load_const_instr_create(shader, 1);
+            const_zero->value.u[0] = 0;
+            nir_instr_insert_after_cf_list(this->cf_node_list,
+                                           &const_zero->instr);
+
+            nir_alu_instr *load_ssbo_compare =
+               nir_alu_instr_create(shader, nir_op_ine);
+            load_ssbo_compare->src[0].src.is_ssa = true;
+            load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+            load_ssbo_compare->src[1].src.is_ssa = true;
+            load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+            for (unsigned i = 0; i < type->vector_elements; i++)
+               load_ssbo_compare->src[1].swizzle[i] = 0;
+            nir_ssa_dest_init(&load_ssbo_compare->instr,
+                              &load_ssbo_compare->dest.dest,
+                              type->vector_elements, NULL);
+            load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+            nir_instr_insert_after_cf_list(this->cf_node_list,
+                                           &load_ssbo_compare->instr);
+            dest = &load_ssbo_compare->dest.dest;
+         }
+
+         break;
+      }
       default:
          unreachable("not reached");
       }
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index d0dd7b9ee60..8cab7ea0ae0 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -200,7 +200,7 @@ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
 LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 1, 0) */
+LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first register input is
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index d72a71dfb6c..aa124d9e6cc 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -94,6 +94,8 @@ is_phi_src_scalarizable(nir_phi_src *src,
       case nir_intrinsic_load_uniform_indirect:
       case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_ubo_indirect:
+      case nir_intrinsic_load_ssbo:
+      case nir_intrinsic_load_ssbo_indirect:
       case nir_intrinsic_load_input:
       case nir_intrinsic_load_input_indirect:
          return true;
-- 
cgit v1.2.3


From 5b186aafe7a8d3f96a99ad2fddd2bff99d99e923 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 1 Jun 2015 09:28:29 +0200
Subject: i965/nir/fs: Implement nir_intrinsic_load_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 62 ++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index bcb5e1bc881..a05698e53e2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1567,6 +1567,68 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_load_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[0]);
+
+      fs_reg surf_index;
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                 fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
+
+      /* Get the offset to read from */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Read the vector */
+      for (int i = 0; i < instr->num_components; i++) {
+         fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                1 /* dims */, 1 /* size */,
+                                                BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+
+         /* Vector components are stored contiguous in memory */
+         if (i < instr->num_components) {
+            if (!has_indirect) {
+               const_offset_bytes += 4;
+               bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+            } else {
+               bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+            }
+         }
+      }
+
+      break;
+   }
+
    case nir_intrinsic_load_input_indirect:
       has_indirect = true;
       /* fallthrough */
-- 
cgit v1.2.3


From e3f9c7829c609e8a32da9f36c9829843f2204a37 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 10 Sep 2015 12:01:02 +0200
Subject: i965/nir/vec4: Implement nir_intrinsic_load_ssbo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 54 ++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index d67c2d86f91..2b282094946 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -712,6 +712,60 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
+   case nir_intrinsic_load_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[0]);
+
+      src_reg surf_index;
+      if (const_uniform_block) {
+         unsigned index = prog_data->base.binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = src_reg(index);
+
+         brw_mark_surface_used(&prog_data->base, index);
+      } else {
+         surf_index = src_reg(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+                  src_reg(prog_data->base.binding_table.ubo_start)));
+         surf_index = emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(&prog_data->base,
+                               prog_data->base.binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
+
+      src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+      }
+
+      /* Read the vector */
+      const vec4_builder bld = vec4_builder(this).at_end()
+         .annotate(current_annotation, base_ir);
+
+      src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                              1 /* dims */, 4 /* size*/,
+                                              BRW_PREDICATE_NONE);
+      dst_reg dest = get_nir_dest(instr->dest);
+      read_result.type = dest.type;
+      read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+      emit(MOV(dest, read_result));
+
+      break;
+   }
+
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-- 
cgit v1.2.3


From 475d9c32d1331eb330c8594ed6f84e668157500e Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 19 May 2015 09:02:06 +0200
Subject: nir/glsl_to_nir: ignore an instruction's dest if it hasn't any
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d1e2488a69e..f8f3fd85bd4 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1036,7 +1036,8 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
 {
    nir_dest *dest = get_instr_dest(instr);
 
-   nir_ssa_dest_init(instr, dest, num_components, NULL);
+   if (dest)
+      nir_ssa_dest_init(instr, dest, num_components, NULL);
 
    nir_instr_insert_after_cf_list(this->cf_node_list, instr);
    this->result = instr;
-- 
cgit v1.2.3


From 586142658e2927a68ce02c44d3bdcaa628cac717 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 23 Sep 2015 09:04:53 +0200
Subject: glsl: atomic counters can be declared as buffer-qualified variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_to_hir.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index d6071ef59fc..97c6350b1f0 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2789,7 +2789,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
    }
 
    if (var->type->contains_atomic()) {
-      if (var->data.mode == ir_var_uniform) {
+      if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) {
          if (var->data.explicit_binding) {
             unsigned *offset =
                &state->atomic_counter_offsets[var->data.binding];
@@ -2807,8 +2807,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          }
       } else if (var->data.mode != ir_var_function_in) {
          _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
-                          "function parameters or uniform-qualified "
-                          "global variables");
+                          "function parameters, uniform-qualified or "
+                          "buffer-qualified global variables");
       }
    }
 
-- 
cgit v1.2.3


From 2cacebaad3da88853e9b0d19522d24e11a7f4f91 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 23 Apr 2015 11:06:12 +0200
Subject: glsl: Rename atomic counter functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shader Storage Buffer Object will add new atomic functions that are not
associated with counters, so better have atomic counter-specific functions
explicitly include the word "counter" in their names.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/builtin_functions.cpp | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 7e77c9321b3..ede9dd867df 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -737,9 +737,9 @@ private:
    B1(interpolateAtOffset)
    B1(interpolateAtSample)
 
-   ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail);
-   ir_function_signature *_atomic_op(const char *intrinsic,
-                                     builtin_available_predicate avail);
+   ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail);
+   ir_function_signature *_atomic_counter_op(const char *intrinsic,
+                                             builtin_available_predicate avail);
 
    B1(min3)
    B1(max3)
@@ -872,13 +872,13 @@ void
 builtin_builder::create_intrinsics()
 {
    add_function("__intrinsic_atomic_read",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
                 NULL);
    add_function("__intrinsic_atomic_increment",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
                 NULL);
    add_function("__intrinsic_atomic_predecrement",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
                 NULL);
 
    add_image_functions(false);
@@ -2541,16 +2541,16 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("atomicCounter",
-                _atomic_op("__intrinsic_atomic_read",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_read",
+                                   shader_atomic_counters),
                 NULL);
    add_function("atomicCounterIncrement",
-                _atomic_op("__intrinsic_atomic_increment",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_increment",
+                                   shader_atomic_counters),
                 NULL);
    add_function("atomicCounterDecrement",
-                _atomic_op("__intrinsic_atomic_predecrement",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_predecrement",
+                                   shader_atomic_counters),
                 NULL);
 
    add_function("min3",
@@ -4841,7 +4841,7 @@ builtin_builder::_interpolateAtSample(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
+builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
 {
    ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter");
    MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter);
@@ -4849,8 +4849,8 @@ builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_atomic_op(const char *intrinsic,
-                            builtin_available_predicate avail)
+builtin_builder::_atomic_counter_op(const char *intrinsic,
+                                    builtin_available_predicate avail)
 {
    ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter");
    MAKE_SIG(glsl_type::uint_type, avail, 1, counter);
-- 
cgit v1.2.3


From ea0a1f5beb22982a886ba862ba95f92c9e35165a Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 23 Apr 2015 11:21:54 +0200
Subject: glsl: Add atomic functions from ARB_shader_storage_buffer_object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/builtin_functions.cpp | 185 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 185 insertions(+)

(limited to 'src')

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index ede9dd867df..f0f6be21b7d 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -400,6 +400,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state)
    return state->has_atomic_counters();
 }
 
+static bool
+shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
+{
+   return state->ARB_shader_storage_buffer_object_enable;
+}
+
 static bool
 shader_trinary_minmax(const _mesa_glsl_parse_state *state)
 {
@@ -741,6 +747,17 @@ private:
    ir_function_signature *_atomic_counter_op(const char *intrinsic,
                                              builtin_available_predicate avail);
 
+   ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+                                                  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
+                                           builtin_available_predicate avail,
+                                           const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+                                                  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
+                                           builtin_available_predicate avail,
+                                           const glsl_type *type);
+
    B1(min3)
    B1(max3)
    B1(mid3)
@@ -881,6 +898,55 @@ builtin_builder::create_intrinsics()
                 _atomic_counter_intrinsic(shader_atomic_counters),
                 NULL);
 
+   add_function("__intrinsic_ssbo_atomic_add",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_min",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_max",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_and",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_or",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_xor",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_exchange",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_comp_swap",
+                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+
    add_image_functions(false);
 
    add_function("__intrinsic_memory_barrier",
@@ -2553,6 +2619,71 @@ builtin_builder::create_builtins()
                                    shader_atomic_counters),
                 NULL);
 
+   add_function("atomicAdd",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicMin",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicMax",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicAnd",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicOr",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicXor",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicExchange",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicCompSwap",
+                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+
    add_function("min3",
                 _min3(glsl_type::float_type),
                 _min3(glsl_type::vec2_type),
@@ -4848,6 +4979,27 @@ builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+                                         const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic");
+   ir_variable *data = in_var(type, "data");
+   MAKE_INTRINSIC(type, avail, 2, atomic, data);
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+                                         const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic");
+   ir_variable *data1 = in_var(type, "data1");
+   ir_variable *data2 = in_var(type, "data2");
+   MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2);
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_atomic_counter_op(const char *intrinsic,
                                     builtin_available_predicate avail)
@@ -4862,6 +5014,39 @@ builtin_builder::_atomic_counter_op(const char *intrinsic,
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
+                                  builtin_available_predicate avail,
+                                  const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic_var");
+   ir_variable *data = in_var(type, "atomic_data");
+   MAKE_SIG(type, avail, 2, atomic, data);
+
+   ir_variable *retval = body.make_temp(type, "atomic_retval");
+   body.emit(call(shader->symbols->get_function(intrinsic), retval,
+                  sig->parameters));
+   body.emit(ret(retval));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_op3(const char *intrinsic,
+                                  builtin_available_predicate avail,
+                                  const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic_var");
+   ir_variable *data1 = in_var(type, "atomic_data1");
+   ir_variable *data2 = in_var(type, "atomic_data2");
+   MAKE_SIG(type, avail, 3, atomic, data1, data2);
+
+   ir_variable *retval = body.make_temp(type, "atomic_retval");
+   body.emit(call(shader->symbols->get_function(intrinsic), retval,
+                  sig->parameters));
+   body.emit(ret(retval));
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_min3(const glsl_type *type)
 {
-- 
cgit v1.2.3


From da659087b9620805c155c3954f560995ed96d5b4 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 1 Sep 2015 11:57:07 +0200
Subject: glsl: use ir_rvalue instead of ir_dereference in auxiliary functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In a later commit we will need to handle ir_swizzle nodes too, which are
not an ir_dereference. That can happen, for example, when we pass a
component of an integer vector as argument to any of the SSBO atomic
functions.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 4aaa2598a83..ca0be5298a6 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -57,7 +57,7 @@ using namespace ir_builder;
  * thing referenced is row-major.
  */
 static bool
-is_dereferenced_thing_row_major(const ir_dereference *deref)
+is_dereferenced_thing_row_major(const ir_rvalue *deref)
 {
    bool matrix = false;
    const ir_rvalue *ir = deref;
@@ -143,7 +143,7 @@ public:
    ir_visitor_status visit_enter(ir_assignment *ir);
 
    void setup_for_load_or_store(ir_variable *var,
-                                ir_dereference *deref,
+                                ir_rvalue *deref,
                                 ir_rvalue **offset,
                                 unsigned *const_offset,
                                 bool *row_major,
@@ -196,7 +196,7 @@ public:
  * \c UniformBlocks array.
  */
 static const char *
-interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
+interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
                      ir_rvalue **nonconst_block_index)
 {
    ir_rvalue *previous_index = NULL;
@@ -255,7 +255,7 @@ interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
 
 void
 lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
-                                                     ir_dereference *deref,
+                                                     ir_rvalue *deref,
                                                      ir_rvalue **offset,
                                                      unsigned *const_offset,
                                                      bool *row_major,
-- 
cgit v1.2.3


From d2719b6e4f6bdbbd29ac66903d3d9dad9bd01386 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Wed, 8 Jul 2015 15:59:05 +0200
Subject: glsl: lower SSBO atomic intrinsics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The first argument to SSBO atomics is a reference to a SSBO buffer variable
so we want to compute its block index and offset and provide these values
to an internal version of the intrinsic that takes them instead of the
buffer variable reference.

v2:
- Support single components of integer vectors to be passed in as arguments.
- Get interface packing information from interface's type.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 159 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index ca0be5298a6..31885cd3dec 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -180,6 +180,10 @@ public:
    unsigned calculate_unsized_array_stride(ir_dereference *deref,
                                            unsigned packing);
 
+   ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_visitor_status visit_enter(ir_call *ir);
+
    void *mem_ctx;
    struct gl_shader *shader;
    struct gl_uniform_buffer_variable *ubo_var;
@@ -242,7 +246,12 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
 
          break;
       }
+      case ir_type_swizzle: {
+         ir_swizzle *s = (ir_swizzle *) d;
 
+         d = s->val->as_dereference();
+         break;
+      }
       default:
          assert(!"Should not get here.");
          break;
@@ -427,6 +436,16 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
          break;
       }
 
+      case ir_type_swizzle: {
+         ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+         assert(deref_swizzle->mask.num_components == 1);
+
+         *const_offset += deref_swizzle->mask.x * sizeof(int);
+         deref = deref_swizzle->val->as_dereference();
+         break;
+      }
+
       default:
          assert(!"not reached");
          deref = NULL;
@@ -1014,6 +1033,146 @@ lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
    return rvalue_visit(ir);
 }
 
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the
+ * access to the buffer variable in the first parameter by an offset
+ * and block index. This involves creating the new internal intrinsic
+ * (i.e. the new function signature).
+ */
+ir_call *
+lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
+{
+   /* SSBO atomics usually have 2 parameters, the buffer variable and an
+    * integer argument. The exception is CompSwap, that has an additional
+    * integer parameter.
+    */
+   int param_count = ir->actual_parameters.length();
+   assert(param_count == 2 || param_count == 3);
+
+   /* First argument must be a scalar integer buffer variable */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_instruction *inst = (ir_instruction *) param;
+   assert(inst->ir_type == ir_type_dereference_variable ||
+          inst->ir_type == ir_type_dereference_array ||
+          inst->ir_type == ir_type_dereference_record ||
+          inst->ir_type == ir_type_swizzle);
+
+   ir_rvalue *deref = (ir_rvalue *) inst;
+   assert(deref->type->is_scalar() && deref->type->is_integer());
+
+   ir_variable *var = deref->variable_referenced();
+   assert(var);
+
+   /* Compute the offset to the start if the dereference and the
+    * block index
+    */
+   mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
+
+   setup_for_load_or_store(var, deref,
+                           &offset, &const_offset,
+                           &row_major, &matrix_columns,
+                           packing);
+   assert(offset);
+   assert(!row_major);
+   assert(matrix_columns == 1);
+
+   ir_rvalue *deref_offset =
+      add(offset, new(mem_ctx) ir_constant(const_offset));
+   ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
+
+   /* Create the new internal function signature that will take a block
+    * index and offset instead of a buffer variable
+    */
+   exec_list sig_params;
+   ir_variable *sig_param = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   sig_param = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+      glsl_type::int_type : glsl_type::uint_type;
+   param = param->get_next();
+   sig_param = new(mem_ctx)
+         ir_variable(type, "data1", ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   if (param_count == 3) {
+      param = param->get_next();
+      sig_param = new(mem_ctx)
+            ir_variable(type, "data2", ir_var_function_in);
+      sig_params.push_tail(sig_param);
+   }
+
+   ir_function_signature *sig =
+      new(mem_ctx) ir_function_signature(deref->type,
+                                         shader_storage_buffer_object);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   char func_name[64];
+   sprintf(func_name, "%s_internal", ir->callee_name());
+   ir_function *f = new(mem_ctx) ir_function(func_name);
+   f->add_signature(sig);
+
+   /* Now, create the call to the internal intrinsic */
+   exec_list call_params;
+   call_params.push_tail(block_index);
+   call_params.push_tail(deref_offset);
+   param = ir->actual_parameters.get_head()->get_next();
+   ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+   call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   if (param_count == 3) {
+      param = param->get_next();
+      param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+      call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   }
+   ir_dereference_variable *return_deref =
+      ir->return_deref->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_call(sig, return_deref, &call_params);
+}
+
+ir_call *
+lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
+{
+   const char *callee = ir->callee_name();
+   if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
+      return lower_ssbo_atomic_intrinsic(ir);
+   }
+
+   return ir;
+}
+
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_call *ir)
+{
+   ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
+   if (new_ir != ir) {
+      progress = true;
+      base_ir->replace_with(new_ir);
+      return visit_continue_with_parent;
+   }
+
+   return rvalue_visit(ir);
+}
+
+
 } /* unnamed namespace */
 
 void
-- 
cgit v1.2.3


From 9d5c0be5d518a0ac7381ebe40b1fa2bef9839bdf Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 1 Jun 2015 09:40:39 +0200
Subject: nir: Implement lowered SSBO atomic intrinsics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original GLSL IR intrinsics have been lowered to an internal
version that accepts a block index and an offset instead of a
SSBO reference.

v2 (Connor):
  - Document the sources used by the atomic intrinsics.

Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/glsl_to_nir.cpp  | 56 +++++++++++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir_intrinsics.h | 26 ++++++++++++++++++++
 2 files changed, 82 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index f8f3fd85bd4..f03a107a901 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -653,6 +653,22 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_store_ssbo;
       } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
          op = nir_intrinsic_load_ssbo;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_add;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_and;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_or;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_xor;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_min;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_max;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_exchange;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_comp_swap;
       } else {
          unreachable("not reached");
       }
@@ -855,7 +871,47 @@ nir_visitor::visit(ir_call *ir)
                                            &load_ssbo_compare->instr);
             dest = &load_ssbo_compare->dest.dest;
          }
+         break;
+      }
+      case nir_intrinsic_ssbo_atomic_add:
+      case nir_intrinsic_ssbo_atomic_min:
+      case nir_intrinsic_ssbo_atomic_max:
+      case nir_intrinsic_ssbo_atomic_and:
+      case nir_intrinsic_ssbo_atomic_or:
+      case nir_intrinsic_ssbo_atomic_xor:
+      case nir_intrinsic_ssbo_atomic_exchange:
+      case nir_intrinsic_ssbo_atomic_comp_swap: {
+         int param_count = ir->actual_parameters.length();
+         assert(param_count == 3 || param_count == 4);
+
+         /* Block index */
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_instruction *inst = (ir_instruction *) param;
+         instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+
+         /* Offset */
+         param = param->get_next();
+         inst = (ir_instruction *) param;
+         instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+
+         /* data1 parameter (this is always present) */
+         param = param->get_next();
+         inst = (ir_instruction *) param;
+         instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+
+         /* data2 parameter (only with atomic_comp_swap) */
+         if (param_count == 4) {
+            assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+            param = param->get_next();
+            inst = (ir_instruction *) param;
+            instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+         }
 
+         /* Atomic result */
+         assert(ir->return_deref);
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           ir->return_deref->type->vector_elements, NULL);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
       default:
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 8cab7ea0ae0..06f1b02383a 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -156,6 +156,32 @@ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
 INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ *    operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ *    in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
 #define SYSTEM_VALUE(name, components, num_indices) \
    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-- 
cgit v1.2.3


From 14af6f4698a9f60c080b9adda4d3b4c45b157bd7 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 1 Jun 2015 09:41:47 +0200
Subject: i965/nir/fs: Implement nir_intrinsic_ssbo_atomic_*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.h       |  2 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 77 ++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 58205084549..50ce4155f16 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -256,6 +256,8 @@ public:
                        nir_ssa_undef_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
+   void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
+                             int op, nir_intrinsic_instr *instr);
    void nir_emit_texture(const brw::fs_builder &bld,
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a05698e53e2..a2bc5c64e4e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1867,6 +1867,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_ssbo_atomic_add:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_min:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_max:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_and:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_or:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+      break;
+
    case nir_intrinsic_get_buffer_size: {
       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
       unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
@@ -1896,6 +1927,52 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    }
 }
 
+void
+fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+                                 int op, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+                            const_surface->u[0];
+      surface = fs_reg(surf_index);
+      brw_mark_surface_used(prog_data, surf_index);
+   } else {
+      surface = vgrf(glsl_type::uint_type);
+      bld.ADD(surface, get_nir_src(instr->src[0]),
+              fs_reg(stage_prog_data->binding_table.ubo_start));
+
+      /* Assume this may touch any UBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(prog_data,
+                            stage_prog_data->binding_table.ubo_start +
+                            shader_prog->NumUniformBlocks - 1);
+   }
+
+   fs_reg offset = get_nir_src(instr->src[1]);
+   fs_reg data1 = get_nir_src(instr->src[2]);
+   fs_reg data2;
+   if (op == BRW_AOP_CMPWR)
+      data2 = get_nir_src(instr->src[3]);
+
+   /* Emit the actual atomic operation operation */
+
+   fs_reg atomic_result =
+      surface_access::emit_untyped_atomic(bld, surface, offset,
+                                          data1, data2,
+                                          1 /* dims */, 1 /* rsize */,
+                                          op,
+                                          BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 void
 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
-- 
cgit v1.2.3


From 5ef169034c77ede86546d8dc42f7f22abcd6faa0 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 7 Aug 2015 11:31:13 +0200
Subject: i965/nir/vec4: Implement nir_intrinsic_ssbo_atomic_*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_vec4.h       |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 78 ++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index ed711902de4..ac9bd4a11fa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -424,6 +424,7 @@ public:
    virtual void nir_emit_jump(nir_jump_instr *instr);
    virtual void nir_emit_texture(nir_tex_instr *instr);
    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
+   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 
    dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
    dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 2b282094946..2d2e5753eff 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -766,6 +766,37 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
+   case nir_intrinsic_ssbo_atomic_add:
+      nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_min:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+      else
+         nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_max:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+      else
+         nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_and:
+      nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_or:
+      nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor:
+      nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+      break;
+
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
@@ -895,6 +926,53 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    }
 }
 
+void
+vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+{
+   dst_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   src_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = prog_data->base.binding_table.ubo_start +
+                            const_surface->u[0];
+      surface = src_reg(surf_index);
+      brw_mark_surface_used(&prog_data->base, surf_index);
+   } else {
+      surface = src_reg(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+               src_reg(prog_data->base.binding_table.ubo_start)));
+
+      /* Assume this may touch any UBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(&prog_data->base,
+                            prog_data->base.binding_table.ubo_start +
+                            shader_prog->NumUniformBlocks - 1);
+   }
+
+   src_reg offset = get_nir_src(instr->src[1], 1);
+   src_reg data1 = get_nir_src(instr->src[2], 1);
+   src_reg data2;
+   if (op == BRW_AOP_CMPWR)
+      data2 = get_nir_src(instr->src[3], 1);
+
+   /* Emit the actual atomic operation operation */
+   const vec4_builder bld =
+      vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+
+   src_reg atomic_result =
+      surface_access::emit_untyped_atomic(bld, surface, offset,
+                                          data1, data2,
+                                          1 /* dims */, 1 /* rsize */,
+                                          op,
+                                          BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 static unsigned
 brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
 {
-- 
cgit v1.2.3


From a07d0c26574203415da343718d906375accf95b3 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 24 Apr 2015 12:34:00 +0200
Subject: glsl: First argument to atomic functions must be a buffer variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
  - Add ssbo_in the names of the static functions so it is clear that this
    is specific to SSBO atomics.

v3:
  - Move the check after the loop (Kristian Høgsberg)

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_function.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'src')

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index ff5ecb954f0..26d4c62ce36 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -142,6 +142,31 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
    return true;
 }
 
+static bool
+verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+                                   ir_variable *var)
+{
+   if (!var || !var->is_in_shader_storage_block()) {
+      _mesa_glsl_error(loc, state, "First argument to atomic function "
+                       "must be a buffer variable");
+      return false;
+   }
+   return true;
+}
+
+static bool
+is_atomic_ssbo_function(const char *func_name)
+{
+   return !strcmp(func_name, "atomicAdd") ||
+          !strcmp(func_name, "atomicMin") ||
+          !strcmp(func_name, "atomicMax") ||
+          !strcmp(func_name, "atomicAnd") ||
+          !strcmp(func_name, "atomicOr") ||
+          !strcmp(func_name, "atomicXor") ||
+          !strcmp(func_name, "atomicExchange") ||
+          !strcmp(func_name, "atomicCompSwap");
+}
+
 /**
  * Verify that 'out' and 'inout' actual parameters are lvalues.  Also, verify
  * that 'const_in' formal parameters (an extension in our IR) correspond to
@@ -256,6 +281,23 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
       actual_ir_node  = actual_ir_node->next;
       actual_ast_node = actual_ast_node->next;
    }
+
+   /* The first parameter of atomic functions must be a buffer variable */
+   const char *func_name = sig->function_name();
+   bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
+   if (is_atomic_ssbo) {
+      const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head;
+
+      const ast_expression *const actual_ast =
+         exec_node_data(ast_expression, actual_ast_parameters.head, link);
+      YYLTYPE loc = actual_ast->get_location();
+
+      if (!verify_first_atomic_ssbo_parameter(&loc, state,
+                                              actual->variable_referenced())) {
+         return false;
+      }
+   }
+
    return true;
 }
 
-- 
cgit v1.2.3


From 4b7b1cf3c0b1682d5ae8983df2bbf5dea50081cd Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 21 Apr 2015 16:58:29 +0200
Subject: mesa: add glShaderStorageBlockBinding()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Defined in ARB_shader_storage_buffer_object extension.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/uniforms.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/uniforms.h |  4 ++++
 2 files changed, 56 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 973b877befb..c491707e937 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1036,6 +1036,58 @@ _mesa_UniformBlockBinding(GLuint program,
    }
 }
 
+void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+			        GLuint shaderStorageBlockIndex,
+			        GLuint shaderStorageBlockBinding)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg;
+
+   if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderStorageBlockBinding");
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program,
+					    "glShaderStorageBlockBinding");
+   if (!shProg)
+      return;
+
+   if (shaderStorageBlockIndex >= shProg->NumUniformBlocks) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+		  "glShaderStorageBlockBinding(block index %u >= %u)",
+		  shaderStorageBlockIndex, shProg->NumUniformBlocks);
+      return;
+   }
+
+   if (shaderStorageBlockBinding >= ctx->Const.MaxShaderStorageBufferBindings) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+		  "glShaderStorageBlockBinding(block binding %u >= %u)",
+		  shaderStorageBlockBinding,
+                  ctx->Const.MaxShaderStorageBufferBindings);
+      return;
+   }
+
+   if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+       shaderStorageBlockBinding) {
+      int i;
+
+      FLUSH_VERTICES(ctx, 0);
+      ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+      shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
+
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
+	 int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+
+	 if (stage_index != -1) {
+	    struct gl_shader *sh = shProg->_LinkedShaders[i];
+	    sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+	 }
+      }
+   }
+}
 
 /**
  * Generic program resource property query.
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index c3c9c1e7dd8..96172b72aa1 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -225,6 +225,10 @@ _mesa_UniformBlockBinding(GLuint program,
 			  GLuint uniformBlockIndex,
 			  GLuint uniformBlockBinding);
 void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+                                GLuint shaderStorageBlockIndex,
+                                GLuint shaderStorageBlockBinding);
+void GLAPIENTRY
 _mesa_GetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex,
                                      GLenum pname, GLint *params);
 void GLAPIENTRY
-- 
cgit v1.2.3


From 2e16dd1350703865104253a9d871e4c0a3257933 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 28 Apr 2015 10:08:17 +0200
Subject: mesa: Add queries for GL_SHADER_STORAGE_BUFFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These handle querying the buffer name attached to a giving binding point
as well as the start offset and size of that buffer.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/get.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index e643d3a5e36..2390850a0af 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1001,6 +1001,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
    case GL_UNIFORM_BUFFER_BINDING:
       v->value_int = ctx->UniformBuffer->Name;
       break;
+   /* GL_ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+      v->value_int = ctx->ShaderStorageBuffer->Name;
+      break;
    /* GL_ARB_timer_query */
    case GL_TIMESTAMP:
       if (ctx->Driver.GetTimestamp) {
@@ -1939,6 +1943,33 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
       v->value_int = ctx->UniformBufferBindings[index].Size;
       return TYPE_INT;
 
+   /* ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].BufferObject->Name;
+      return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_START:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].Offset < 0 ? 0 :
+                     ctx->ShaderStorageBufferBindings[index].Offset;
+      return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_SIZE:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].Size < 0 ? 0 :
+                     ctx->ShaderStorageBufferBindings[index].Size;
+      return TYPE_INT;
+
    /* ARB_texture_multisample / GL3.2 */
    case GL_SAMPLE_MASK_VALUE:
       if (index != 0)
-- 
cgit v1.2.3


From 3b2037f88c974ce6b194f099db32716f152b15e7 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Thu, 14 May 2015 07:39:52 +0200
Subject: glsl: fix UNIFORM_BUFFER_START or UNIFORM_BUFFER_SIZE query when no
 buffer object is bound
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to ARB_uniform_buffer_object spec:

"If the parameter (starting offset or size) was not specified when the
 buffer object was bound (e.g. if bound with BindBufferBase), or if no
 buffer object is bound to <index>, zero is returned."

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/get.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 2390850a0af..77184ee7986 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1932,7 +1932,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 	 goto invalid_value;
       if (!ctx->Extensions.ARB_uniform_buffer_object)
 	 goto invalid_enum;
-      v->value_int = ctx->UniformBufferBindings[index].Offset;
+      v->value_int = ctx->UniformBufferBindings[index].Offset < 0 ? 0 :
+                     ctx->UniformBufferBindings[index].Offset;
       return TYPE_INT;
 
    case GL_UNIFORM_BUFFER_SIZE:
@@ -1940,7 +1941,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 	 goto invalid_value;
       if (!ctx->Extensions.ARB_uniform_buffer_object)
 	 goto invalid_enum;
-      v->value_int = ctx->UniformBufferBindings[index].Size;
+      v->value_int = ctx->UniformBufferBindings[index].Size < 0 ? 0 :
+                     ctx->UniformBufferBindings[index].Size;
       return TYPE_INT;
 
    /* ARB_shader_storage_buffer_object */
-- 
cgit v1.2.3


From f4c8c01a3db984a08dc02b72c2904b0532e0847f Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 28 Apr 2015 11:27:24 +0200
Subject: glsl: Allow use of memory qualifiers with
 ARB_shader_storage_buffer_object.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_lexer.ll | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 90e84ed1bfe..21428177c97 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -406,11 +406,11 @@ image2DShadow           KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW);
 image1DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
 image2DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
 
-coherent       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, COHERENT);
-volatile       KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable, VOLATILE);
-restrict       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, RESTRICT);
-readonly       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, READONLY);
-writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
+coherent       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT);
+volatile       KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE);
+restrict       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT);
+readonly       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY);
+writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY);
 
 atomic_uint     KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
 
-- 
cgit v1.2.3


From f1b647fdd1028bf475ed258c4dd8b833339ec796 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 28 Apr 2015 12:09:58 +0200
Subject: glsl: Apply memory qualifiers to buffer variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
  - Save memory qualifier info in the top level members of a shader
    storage block.
  - Add a checks to record_compare() which is used when comparing
    shader storage buffer declarations in different shaders.
  - Always report an error for incompatible readonly/writeonly
    definitions, whether they are present at block or field level.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_to_hir.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++++---
 src/glsl/glsl_types.cpp | 20 ++++++++++++++++
 src/glsl/glsl_types.h   | 11 +++++++++
 3 files changed, 91 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 97c6350b1f0..0b8b4016a7e 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5606,10 +5606,19 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                                          bool is_interface,
                                          enum glsl_matrix_layout matrix_layout,
                                          bool allow_reserved_names,
-                                         ir_variable_mode var_mode)
+                                         ir_variable_mode var_mode,
+                                         ast_type_qualifier *layout)
 {
    unsigned decl_count = 0;
 
+   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+    * that we don't have incompatible qualifiers
+    */
+   if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block sets both readonly and writeonly");
+   }
+
    /* Make an initial pass over the list of fields to determine how
     * many there are.  Each element in this list is an ast_declarator_list.
     * This means that we actually need to count the number of elements in the
@@ -5771,6 +5780,44 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                    || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
          }
 
+         /* Image qualifiers are allowed on buffer variables, which can only
+          * be defined inside shader storage buffer objects
+          */
+         if (layout && var_mode == ir_var_shader_storage) {
+            if (qual->flags.q.read_only && qual->flags.q.write_only) {
+               _mesa_glsl_error(&loc, state,
+                                "buffer variable `%s' can't be "
+                                "readonly and writeonly.", fields[i].name);
+            }
+
+            /* For readonly and writeonly qualifiers the field definition,
+             * if set, overwrites the layout qualifier.
+             */
+            bool read_only = layout->flags.q.read_only;
+            bool write_only = layout->flags.q.write_only;
+
+            if (qual->flags.q.read_only) {
+               read_only = true;
+               write_only = false;
+            } else if (qual->flags.q.write_only) {
+               read_only = false;
+               write_only = true;
+            }
+
+            fields[i].image_read_only = read_only;
+            fields[i].image_write_only = write_only;
+
+            /* For other qualifiers, we set the flag if either the layout
+             * qualifier or the field qualifier are set
+             */
+            fields[i].image_coherent = qual->flags.q.coherent ||
+                                        layout->flags.q.coherent;
+            fields[i].image_volatile = qual->flags.q._volatile ||
+                                        layout->flags.q._volatile;
+            fields[i].image_restrict = qual->flags.q.restrict_flag ||
+                                        layout->flags.q.restrict_flag;
+         }
+
          i++;
       }
    }
@@ -5825,7 +5872,8 @@ ast_struct_specifier::hir(exec_list *instructions,
                                                false,
                                                GLSL_MATRIX_LAYOUT_INHERITED,
                                                false /* allow_reserved_names */,
-                                               ir_var_auto);
+                                               ir_var_auto,
+                                               NULL);
 
    validate_identifier(this->name, loc, state);
 
@@ -5980,7 +6028,8 @@ ast_interface_block::hir(exec_list *instructions,
                                                true,
                                                matrix_layout,
                                                redeclaring_per_vertex,
-                                               var_mode);
+                                               var_mode,
+                                               &this->layout);
 
    state->struct_specifier_depth--;
 
@@ -6364,6 +6413,14 @@ ast_interface_block::hir(exec_list *instructions,
 
          var->data.stream = this->layout.stream;
 
+         if (var->data.mode == ir_var_shader_storage) {
+            var->data.image_read_only = fields[i].image_read_only;
+            var->data.image_write_only = fields[i].image_write_only;
+            var->data.image_coherent = fields[i].image_coherent;
+            var->data.image_volatile = fields[i].image_volatile;
+            var->data.image_restrict = fields[i].image_restrict;
+         }
+
          /* Examine var name here since var may get deleted in the next call */
          bool var_is_gl_id = is_gl_identifier(var->name);
 
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 93034a67f01..0ead0f2a327 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -124,6 +124,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
       this->fields.structure[i].sample = fields[i].sample;
       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
       this->fields.structure[i].patch = fields[i].patch;
+      this->fields.structure[i].image_read_only = fields[i].image_read_only;
+      this->fields.structure[i].image_write_only = fields[i].image_write_only;
+      this->fields.structure[i].image_coherent = fields[i].image_coherent;
+      this->fields.structure[i].image_volatile = fields[i].image_volatile;
+      this->fields.structure[i].image_restrict = fields[i].image_restrict;
    }
 
    mtx_unlock(&glsl_type::mutex);
@@ -760,6 +765,21 @@ glsl_type::record_compare(const glsl_type *b) const
       if (this->fields.structure[i].patch
           != b->fields.structure[i].patch)
          return false;
+      if (this->fields.structure[i].image_read_only
+          != b->fields.structure[i].image_read_only)
+         return false;
+      if (this->fields.structure[i].image_write_only
+          != b->fields.structure[i].image_write_only)
+         return false;
+      if (this->fields.structure[i].image_coherent
+          != b->fields.structure[i].image_coherent)
+         return false;
+      if (this->fields.structure[i].image_volatile
+          != b->fields.structure[i].image_volatile)
+         return false;
+      if (this->fields.structure[i].image_restrict
+          != b->fields.structure[i].image_restrict)
+         return false;
    }
 
    return true;
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index d58d8189e21..23ada15b854 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -810,6 +810,17 @@ struct glsl_struct_field {
     */
    int stream;
 
+
+   /**
+    * Image qualifiers, applicable to buffer variables defined in shader
+    * storage buffer objects (SSBOs)
+    */
+   unsigned image_read_only:1;
+   unsigned image_write_only:1;
+   unsigned image_coherent:1;
+   unsigned image_volatile:1;
+   unsigned image_restrict:1;
+
    glsl_struct_field(const struct glsl_type *_type, const char *_name)
       : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
         sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
-- 
cgit v1.2.3


From 6ef82f039c6fc82dc0910e842a47c4a69ab44e12 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 8 May 2015 09:41:58 +0200
Subject: glsl: Allow memory qualifiers on shader storage buffer blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
  - Memory qualifiers on shader storage buffer objects do not come in the form
    of layout qualifiers, they are block-level qualifiers.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_parser.yy | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src')

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4cb018a5862..f0abeb03215 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2628,6 +2628,20 @@ interface_block:
       }
       $$ = block;
    }
+   | memory_qualifier interface_block
+   {
+      ast_interface_block *block = (ast_interface_block *)$2;
+
+      if (!block->layout.flags.q.buffer) {
+            _mesa_glsl_error(& @1, state,
+                             "memory qualifiers can only be used in the "
+                             "declaration of shader storage blocks");
+      }
+      if (!block->layout.merge_qualifier(& @1, state, $1)) {
+         YYERROR;
+      }
+      $$ = block;
+   }
    ;
 
 basic_interface_block:
-- 
cgit v1.2.3


From 995a719499d4bd97b27f1e8c7b506202257007b6 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Wed, 5 Aug 2015 10:30:46 +0200
Subject: glsl: Do not allow assignments to read-only buffer variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
  - Merge the error check for the readonly qualifier with the already
    existing check for variables flagged as readonly (Timothy).
  - Limit the check to buffer variables, image variables have different
    semantics involved (Curro).

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_to_hir.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 0b8b4016a7e..cad4c0300d7 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -820,7 +820,16 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
                           "assignment to %s",
                           non_lvalue_description);
          error_emitted = true;
-      } else if (lhs_var != NULL && lhs_var->data.read_only) {
+      } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+                 (lhs_var->data.mode == ir_var_shader_storage &&
+                  lhs_var->data.image_read_only))) {
+         /* We can have image_read_only set on both images and buffer variables,
+          * but in the former there is a distinction between assignments to
+          * the variable itself (read_only) and to the memory they point to
+          * (image_read_only), while in the case of buffer variables there is
+          * no such distinction, that is why this check here is limited to
+          * buffer variables alone.
+          */
          _mesa_glsl_error(&lhs_loc, state,
                           "assignment to read-only variable '%s'",
                           lhs_var->name);
-- 
cgit v1.2.3


From 0f18945cb612493d787377d8cbb138c18738f683 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 28 Apr 2015 14:25:56 +0200
Subject: glsl: Do not allow reads from write-only buffer variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error location won't be right, but fixing that would require to check
for this as we process each type of AST node that can involve a variable
read.

v2:
  - Limit the check to buffer variables, image variables have different
    semantics involved.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_to_hir.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index cad4c0300d7..6b2e140cf43 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -67,6 +67,48 @@ static void
 remove_per_vertex_blocks(exec_list *instructions,
                          _mesa_glsl_parse_state *state, ir_variable_mode mode);
 
+/**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+{
+public:
+   read_from_write_only_variable_visitor() : found(NULL)
+   {
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+      if (this->in_assignee)
+         return visit_continue;
+
+      ir_variable *var = ir->variable_referenced();
+      /* We can have image_write_only set on both images and buffer variables,
+       * but in the former there is a distinction between reads from
+       * the variable itself (write_only) and from the memory they point to
+       * (image_write_only), while in the case of buffer variables there is
+       * no such distinction, that is why this check here is limited to
+       * buffer variables alone.
+       */
+      if (!var || var->data.mode != ir_var_shader_storage)
+         return visit_continue;
+
+      if (var->data.image_write_only) {
+         found = var;
+         return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   ir_variable *get_variable() {
+      return found;
+   }
+
+private:
+   ir_variable *found;
+};
 
 void
 _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
@@ -162,6 +204,20 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
     */
    remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
    remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+   /* Check that we don't have reads from write-only variables */
+   read_from_write_only_variable_visitor v;
+   v.run(instructions);
+   ir_variable *error_var = v.get_variable();
+   if (error_var) {
+      /* It would be nice to have proper location information, but for that
+       * we would need to check this as we process each kind of AST node
+       */
+      YYLTYPE loc;
+      memset(&loc, 0, sizeof(loc));
+      _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+                       error_var->name);
+   }
 }
 
 
-- 
cgit v1.2.3


From 9b477ad49d3f82503a1b8ba23dedfc05cd848fe8 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 6 May 2015 08:11:02 +0200
Subject: main: Add SHADER_STORAGE_BLOCK and BUFFER_VARIABLE support for
 ARB_program_interface_query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Including TOP_LEVEL_ARRAY_SIZE and TOP_LEVEL_ARRAY_STRIDE queries.

v2:
- Use std430_array_stride() to get top level array stride following std430's rules.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ir_uniform.h            |   5 +
 src/glsl/link_uniforms.cpp       |   3 +
 src/glsl/linker.cpp              |  10 +-
 src/mesa/main/program_resource.c |   7 +-
 src/mesa/main/shader_query.cpp   | 265 +++++++++++++++++++++++++++++++++++++--
 5 files changed, 278 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
index 0b6f7201a20..858a7da6bb9 100644
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -194,6 +194,11 @@ struct gl_uniform_storage {
     * This is a built-in uniform that should not be modified through any gl API.
     */
    bool builtin;
+
+   /**
+    * This is a shader storage buffer variable, not an uniform.
+    */
+   bool is_shader_storage;
 };
 
 #ifdef __cplusplus
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 50a80732d73..1c901e2cecb 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -794,6 +794,9 @@ private:
       if (!this->uniforms[id].builtin)
          this->uniforms[id].storage = this->values;
 
+      this->uniforms[id].is_shader_storage =
+         current_var->is_in_shader_storage_block();
+
       if (this->ubo_block_index != -1) {
          this->uniforms[id].block_index = this->ubo_block_index;
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index aebf2560dab..9d419ac9d39 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3406,14 +3406,18 @@ build_program_resource_list(struct gl_shader_program *shProg)
          }
       }
 
-      if (!add_program_resource(shProg, GL_UNIFORM,
+      bool is_shader_storage =  shProg->UniformStorage[i].is_shader_storage;
+      GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+      if (!add_program_resource(shProg, type,
                                 &shProg->UniformStorage[i], stageref))
          return;
    }
 
-   /* Add program uniform blocks. */
+   /* Add program uniform blocks and shader storage blocks. */
    for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
-      if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+      bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
+      GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
+      if (!add_program_resource(shProg, type,
           &shProg->UniformBlocks[i], 0))
          return;
    }
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index 23d2b4d2da0..c609abeed45 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -41,6 +41,8 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
    case GL_PROGRAM_OUTPUT:
    case GL_TRANSFORM_FEEDBACK_VARYING:
    case GL_ATOMIC_COUNTER_BUFFER:
+   case GL_BUFFER_VARIABLE:
+   case GL_SHADER_STORAGE_BLOCK:
       return true;
    case GL_VERTEX_SUBROUTINE:
    case GL_FRAGMENT_SUBROUTINE:
@@ -58,8 +60,6 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
    case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
    case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
       return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
-   case GL_BUFFER_VARIABLE:
-   case GL_SHADER_STORAGE_BLOCK:
    default:
       return false;
    }
@@ -121,6 +121,7 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
    case GL_MAX_NUM_ACTIVE_VARIABLES:
       switch (programInterface) {
       case GL_UNIFORM_BLOCK:
+      case GL_SHADER_STORAGE_BLOCK:
          for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
             if (shProg->ProgramResourceList[i].Type == programInterface) {
                struct gl_uniform_block *block =
@@ -247,8 +248,10 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
    case GL_PROGRAM_INPUT:
    case GL_PROGRAM_OUTPUT:
    case GL_UNIFORM:
+   case GL_BUFFER_VARIABLE:
    case GL_TRANSFORM_FEEDBACK_VARYING:
    case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
       res = _mesa_program_resource_find_name(shProg, programInterface, name,
                                              &array_index);
       if (!res || array_index > 0)
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index ee7320221e2..99d9e1088d0 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -431,6 +431,7 @@ _mesa_program_resource_name(struct gl_program_resource *res)
    const ir_variable *var;
    switch (res->Type) {
    case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
       return RESOURCE_UBO(res)->Name;
    case GL_TRANSFORM_FEEDBACK_VARYING:
       return RESOURCE_XFB(res)->Name;
@@ -445,6 +446,7 @@ _mesa_program_resource_name(struct gl_program_resource *res)
    case GL_PROGRAM_OUTPUT:
       return RESOURCE_VAR(res)->name;
    case GL_UNIFORM:
+   case GL_BUFFER_VARIABLE:
       return RESOURCE_UNI(res)->name;
    case GL_VERTEX_SUBROUTINE_UNIFORM:
    case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -484,6 +486,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
    case GL_COMPUTE_SUBROUTINE_UNIFORM:
    case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
    case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+   case GL_BUFFER_VARIABLE:
       return RESOURCE_UNI(res)->array_elements;
    case GL_VERTEX_SUBROUTINE:
    case GL_GEOMETRY_SUBROUTINE:
@@ -493,6 +496,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
    case GL_TESS_EVALUATION_SUBROUTINE:
    case GL_ATOMIC_COUNTER_BUFFER:
    case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
       return 0;
    default:
       assert(!"support for resource type not implemented");
@@ -538,6 +542,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
       if (strncmp(rname, name, baselen) == 0) {
          switch (programInterface) {
          case GL_UNIFORM_BLOCK:
+         case GL_SHADER_STORAGE_BLOCK:
             /* Basename match, check if array or struct. */
             if (name[baselen] == '\0' ||
                 name[baselen] == '[' ||
@@ -546,6 +551,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
             }
             break;
          case GL_TRANSFORM_FEEDBACK_VARYING:
+         case GL_BUFFER_VARIABLE:
          case GL_UNIFORM:
          case GL_VERTEX_SUBROUTINE_UNIFORM:
          case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -607,6 +613,7 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
 
    switch (res->Type) {
    case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
       return RESOURCE_UBO(res)- shProg->UniformBlocks;
    case GL_ATOMIC_COUNTER_BUFFER:
       return RESOURCE_ATC(res) - shProg->AtomicBuffers;
@@ -632,6 +639,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
       switch (res->Type) {
       case GL_UNIFORM_BLOCK:
       case GL_ATOMIC_COUNTER_BUFFER:
+      case GL_SHADER_STORAGE_BLOCK:
          if (_mesa_program_resource_index(shProg, res) == index)
             return res;
          break;
@@ -651,6 +659,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
       case GL_COMPUTE_SUBROUTINE:
       case GL_TESS_CONTROL_SUBROUTINE:
       case GL_TESS_EVALUATION_SUBROUTINE:
+      case GL_BUFFER_VARIABLE:
          if (++idx == (int) index)
             return res;
          break;
@@ -804,6 +813,192 @@ program_resource_location(struct gl_shader_program *shProg,
    }
 }
 
+static char*
+get_top_level_name(const char *name)
+{
+   const char *first_dot = strchr(name, '.');
+   const char *first_square_bracket = strchr(name, '[');
+   int name_size = 0;
+   /* From ARB_program_interface_query spec:
+    *
+    * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
+    *  number of active array elements of the top-level shader storage block
+    *  member containing to the active variable is written to <params>.  If the
+    *  top-level block member is not declared as an array, the value one is
+    *  written to <params>.  If the top-level block member is an array with no
+    *  declared size, the value zero is written to <params>.
+    */
+
+   /* The buffer variable is on top level.*/
+   if (!first_square_bracket && !first_dot)
+      name_size = strlen(name);
+   else if ((!first_square_bracket ||
+            (first_dot && first_dot < first_square_bracket)))
+      name_size = first_dot - name;
+   else
+      name_size = first_square_bracket - name;
+
+   return strndup(name, name_size);
+}
+
+static char*
+get_var_name(const char *name)
+{
+   const char *first_dot = strchr(name, '.');
+
+   if (!first_dot)
+      return strdup(name);
+
+   return strndup(first_dot+1, strlen(first_dot) - 1);
+}
+
+static GLint
+program_resource_top_level_array_size(struct gl_shader_program *shProg,
+                                      struct gl_program_resource *res,
+                                      const char *name)
+{
+   int block_index = RESOURCE_UNI(res)->block_index;
+   int array_size = -1;
+   char *var_name = get_top_level_name(name);
+   char *interface_name =
+      get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+   if (strcmp(var_name, interface_name) == 0) {
+      /* Deal with instanced array of SSBOs */
+      char *temp_name = get_var_name(name);
+      free(var_name);
+      var_name = get_top_level_name(temp_name);
+      free(temp_name);
+   }
+
+   for (unsigned i = 0; i < shProg->NumShaders; i++) {
+      if (shProg->Shaders[i] == NULL)
+         continue;
+
+      const gl_shader *stage = shProg->Shaders[i];
+      foreach_in_list(ir_instruction, node, stage->ir) {
+         ir_variable *var = node->as_variable();
+         if (!var || !var->get_interface_type() ||
+             var->data.mode != ir_var_shader_storage)
+            continue;
+
+         const glsl_type *interface = var->get_interface_type();
+
+         if (strcmp(interface_name, interface->name) != 0)
+            continue;
+
+         for (unsigned i = 0; i < interface->length; i++) {
+            const glsl_struct_field *field = &interface->fields.structure[i];
+            if (strcmp(field->name, var_name) != 0)
+               continue;
+            /* From GL_ARB_program_interface_query spec:
+             *
+             * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
+             * identifying the number of active array elements of the top-level
+             * shader storage block member containing to the active variable is
+             * written to <params>.  If the top-level block member is not
+             * declared as an array, the value one is written to <params>.  If
+             * the top-level block member is an array with no declared size,
+             * the value zero is written to <params>.
+             */
+            if (field->type->is_unsized_array())
+               array_size = 0;
+            else if (field->type->is_array())
+               array_size = field->type->length;
+            else
+               array_size = 1;
+            goto found_top_level_array_size;
+         }
+      }
+   }
+found_top_level_array_size:
+   free(interface_name);
+   free(var_name);
+   return array_size;
+}
+
+static GLint
+program_resource_top_level_array_stride(struct gl_shader_program *shProg,
+                                        struct gl_program_resource *res,
+                                        const char *name)
+{
+   int block_index = RESOURCE_UNI(res)->block_index;
+   int array_stride = -1;
+   char *var_name = get_top_level_name(name);
+   char *interface_name =
+      get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+   if (strcmp(var_name, interface_name) == 0) {
+      /* Deal with instanced array of SSBOs */
+      char *temp_name = get_var_name(name);
+      free(var_name);
+      var_name = get_top_level_name(temp_name);
+      free(temp_name);
+   }
+
+   for (unsigned i = 0; i < shProg->NumShaders; i++) {
+      if (shProg->Shaders[i] == NULL)
+         continue;
+
+      const gl_shader *stage = shProg->Shaders[i];
+      foreach_in_list(ir_instruction, node, stage->ir) {
+         ir_variable *var = node->as_variable();
+         if (!var || !var->get_interface_type() ||
+             var->data.mode != ir_var_shader_storage)
+            continue;
+
+         const glsl_type *interface = var->get_interface_type();
+
+         if (strcmp(interface_name, interface->name) != 0) {
+            continue;
+         }
+
+         for (unsigned i = 0; i < interface->length; i++) {
+            const glsl_struct_field *field = &interface->fields.structure[i];
+            if (strcmp(field->name, var_name) != 0)
+               continue;
+            /* From GL_ARB_program_interface_query:
+             *
+             * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
+             *  identifying the stride between array elements of the top-level
+             *  shader storage block member containing the active variable is
+             *  written to <params>.  For top-level block members declared as
+             *  arrays, the value written is the difference, in basic machine
+             *  units, between the offsets of the active variable for
+             *  consecutive elements in the top-level array.  For top-level
+             *  block members not declared as an array, zero is written to
+             *  <params>."
+             */
+            if (field->type->is_array()) {
+               const enum glsl_matrix_layout matrix_layout =
+                  glsl_matrix_layout(field->matrix_layout);
+               bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+               const glsl_type *array_type = field->type->fields.array;
+
+               if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+                  if (array_type->is_record()) {
+                     array_stride = array_type->std140_size(row_major);
+                     array_stride = glsl_align(array_stride, 16);
+                  } else {
+                     unsigned element_base_align = 0;
+                     element_base_align = array_type->std140_base_alignment(row_major);
+                     array_stride = MAX2(element_base_align, 16);
+                  }
+               } else {
+                  array_stride = array_type->std430_array_stride(row_major);
+               }
+            } else {
+               array_stride = 0;
+            }
+            goto found_top_level_array_size;
+         }
+      }
+   }
+found_top_level_array_size:
+   free(var_name);
+   return array_stride;
+}
+
 /**
  * Function implements following location queries:
  *    glGetUniformLocation
@@ -880,7 +1075,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
    if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
       return RESOURCE_ATC(res)->StageReferences[stage];
 
-   if (res->Type == GL_UNIFORM_BLOCK)
+   if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
       return shProg->UniformBlockStageIndex[stage][index] != -1;
 
    return res->StageReferences & (1 << stage);
@@ -893,7 +1088,8 @@ get_buffer_property(struct gl_shader_program *shProg,
 {
    GET_CURRENT_CONTEXT(ctx);
    if (res->Type != GL_UNIFORM_BLOCK &&
-       res->Type != GL_ATOMIC_COUNTER_BUFFER)
+       res->Type != GL_ATOMIC_COUNTER_BUFFER &&
+       res->Type != GL_SHADER_STORAGE_BLOCK)
       goto invalid_operation;
 
    if (res->Type == GL_UNIFORM_BLOCK) {
@@ -929,6 +1125,39 @@ get_buffer_property(struct gl_shader_program *shProg,
          }
          return RESOURCE_UBO(res)->NumUniforms;
       }
+   } else if (res->Type == GL_SHADER_STORAGE_BLOCK) {
+      switch (prop) {
+      case GL_BUFFER_BINDING:
+         *val = RESOURCE_UBO(res)->Binding;
+         return 1;
+      case GL_BUFFER_DATA_SIZE:
+         *val = RESOURCE_UBO(res)->UniformBufferSize;
+         return 1;
+      case GL_NUM_ACTIVE_VARIABLES:
+         *val = 0;
+         for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+            const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+            struct gl_program_resource *uni =
+               _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+                                                iname, NULL);
+            if (!uni)
+               continue;
+            (*val)++;
+         }
+         return 1;
+      case GL_ACTIVE_VARIABLES:
+         for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+            const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+            struct gl_program_resource *uni =
+               _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+                                                iname, NULL);
+            if (!uni)
+               continue;
+            *val++ =
+               _mesa_program_resource_index(shProg, uni);
+         }
+         return RESOURCE_UBO(res)->NumUniforms;
+      }
    } else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) {
       switch (prop) {
       case GL_BUFFER_BINDING:
@@ -967,6 +1196,10 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
    if (res->Type != type)\
       goto invalid_operation;
 
+#define VALIDATE_TYPE_2(type1, type2)\
+   if (res->Type != type1 && res->Type != type2)\
+      goto invalid_operation;
+
    switch(prop) {
    case GL_NAME_LENGTH:
       switch (res->Type) {
@@ -984,6 +1217,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
    case GL_TYPE:
       switch (res->Type) {
       case GL_UNIFORM:
+      case GL_BUFFER_VARIABLE:
          *val = RESOURCE_UNI(res)->type->gl_type;
          return 1;
       case GL_PROGRAM_INPUT:
@@ -999,6 +1233,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
    case GL_ARRAY_SIZE:
       switch (res->Type) {
       case GL_UNIFORM:
+      case GL_BUFFER_VARIABLE:
             *val = MAX2(RESOURCE_UNI(res)->array_elements, 1);
             return 1;
       case GL_PROGRAM_INPUT:
@@ -1012,23 +1247,23 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
          goto invalid_operation;
       }
    case GL_OFFSET:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
       *val = RESOURCE_UNI(res)->offset;
       return 1;
    case GL_BLOCK_INDEX:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
       *val = RESOURCE_UNI(res)->block_index;
       return 1;
    case GL_ARRAY_STRIDE:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
       *val = RESOURCE_UNI(res)->array_stride;
       return 1;
    case GL_MATRIX_STRIDE:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
       *val = RESOURCE_UNI(res)->matrix_stride;
       return 1;
    case GL_IS_ROW_MAJOR:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
       *val = RESOURCE_UNI(res)->row_major;
       return 1;
    case GL_ATOMIC_COUNTER_BUFFER_INDEX:
@@ -1054,6 +1289,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
       case GL_PROGRAM_INPUT:
       case GL_PROGRAM_OUTPUT:
       case GL_UNIFORM_BLOCK:
+      case GL_BUFFER_VARIABLE:
+      case GL_SHADER_STORAGE_BLOCK:
       case GL_ATOMIC_COUNTER_BUFFER:
          *val = is_resource_referenced(shProg, res, index,
                                        stage_from_enum(prop));
@@ -1117,6 +1354,19 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
       }
       return count;
    }
+
+   case GL_TOP_LEVEL_ARRAY_SIZE:
+      VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+      *val = program_resource_top_level_array_size(shProg, res,
+                                                   _mesa_program_resource_name(res));
+      return 1;
+
+   case GL_TOP_LEVEL_ARRAY_STRIDE:
+      VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+      *val = program_resource_top_level_array_stride(shProg, res,
+                                                     _mesa_program_resource_name(res));
+      return 1;
+
    /* GL_ARB_tessellation_shader */
    case GL_IS_PER_PATCH:
       switch (res->Type) {
@@ -1132,6 +1382,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
    }
 
 #undef VALIDATE_TYPE
+#undef VALIDATE_TYPE_2
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller,
-- 
cgit v1.2.3


From 26011fa22a44e539d262b050f0c5b97a7a16a466 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 21 Apr 2015 08:23:36 +0200
Subject: main/tests: add ARB_shader_storage_buffer_object tokens to
 enum_strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/tests/enum_strings.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/tests/enum_strings.cpp b/src/mesa/main/tests/enum_strings.cpp
index 8218cc9a685..96b2246f48b 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -1780,6 +1780,7 @@ const struct enum_info everything[] = {
    { 0x8E5F, "GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET" },
    { 0x8F36, "GL_COPY_READ_BUFFER" },
    { 0x8F37, "GL_COPY_WRITE_BUFFER" },
+   { 0x8F39, "GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES" },
    { 0x8F90, "GL_RED_SNORM" },
    { 0x8F91, "GL_RG_SNORM" },
    { 0x8F92, "GL_RGB_SNORM" },
@@ -1797,6 +1798,20 @@ const struct enum_info everything[] = {
    { 0x8F9E, "GL_PRIMITIVE_RESTART_INDEX" },
    { 0x8F9F, "GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB" },
    { 0x906F, "GL_RGB10_A2UI" },
+   { 0x90D2, "GL_SHADER_STORAGE_BUFFER" },
+   { 0x90D3, "GL_SHADER_STORAGE_BUFFER_BINDING" },
+   { 0x90D4, "GL_SHADER_STORAGE_BUFFER_START" },
+   { 0x90D5, "GL_SHADER_STORAGE_BUFFER_SIZE" },
+   { 0x90D6, "GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS" },
+   { 0x90D7, "GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" },
+   { 0x90D8, "GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" },
+   { 0x90D9, "GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" },
+   { 0x90DA, "GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" },
+   { 0x90DB, "GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS" },
+   { 0x90DC, "GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS" },
+   { 0x90DD, "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS" },
+   { 0x90DE, "GL_MAX_SHADER_STORAGE_BLOCK_SIZE" },
+   { 0x90DF, "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" },
    { 0x9100, "GL_TEXTURE_2D_MULTISAMPLE" },
    { 0x9101, "GL_PROXY_TEXTURE_2D_MULTISAMPLE" },
    { 0x9102, "GL_TEXTURE_2D_MULTISAMPLE_ARRAY" },
-- 
cgit v1.2.3


From 91191af6d6b5579dd1bc1a63b42f9ca416623c7e Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 20 Apr 2015 16:10:44 +0200
Subject: glapi: add ARB_shader_storage_block_buffer_object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 .../glapi/gen/ARB_shader_storage_buffer_object.xml | 36 ++++++++++++++++++++++
 src/mapi/glapi/gen/GL4x.xml                        | 18 ++++++++++-
 src/mapi/glapi/gen/Makefile.am                     |  1 +
 src/mapi/glapi/gen/gl_API.xml                      |  6 +++-
 4 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml

(limited to 'src')

diff --git a/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml
new file mode 100644
index 00000000000..6901bdf6d0e
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_ARB_shader_storage_buffer" number="137">
+
+<enum name="SHADER_STORAGE_BUFFER" value="0x90D2" />
+<enum name="SHADER_STORAGE_BUFFER_BINDING" value="0x90D3" />
+<enum name="SHADER_STORAGE_BUFFER_START" value="0x90D4" />
+<enum name="SHADER_STORAGE_BUFFER_SIZE" value="0x90D5" />
+<enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS" value="0x90D6" />
+<enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" value="0x90D7" />
+<enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" value="0x90D8" />
+<enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+<enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" value="0x90DA" />
+<enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS" value="0x90DB" />
+<enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS" value="0x90DC" />
+<enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS" value="0x90DD" />
+<enum name="MAX_SHADER_STORAGE_BLOCK_SIZE" value="0x90DE" />
+<enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" value="0x90DF" />
+<enum name="SHADER_STORAGE_BARRIER_BIT" value="0x2000" />
+<enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES" value="0x8F39" />
+
+<!-- Duplicated with GL3x.xml: BindBufferRange, BindBufferBase,
+     GetIntegeri_v -->
+
+<function name="ShaderStorageBlockBinding">
+    <param name="program" type="GLuint" />
+    <param name="shaderStorageBlockIndex" type="GLuint" />
+    <param name="shaderStorageBlockBinding" type="GLuint" />
+</function>
+
+</category>
+
+</OpenGLAPI>
diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml
index dee50275a57..dd48c8336f6 100644
--- a/src/mapi/glapi/gen/GL4x.xml
+++ b/src/mapi/glapi/gen/GL4x.xml
@@ -41,7 +41,23 @@
 </category>
 
 <category name="4.3">
-  <enum name="DEPTH_STENCIL_TEXTURE_MODE"              value="0x90EA"/>
+  <enum name="SHADER_STORAGE_BARRIER_BIT"                value="0x2000" />
+  <enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES"      value="0x8F39" />
+  <enum name="SHADER_STORAGE_BUFFER"                     value="0x90D2"/>
+  <enum name="SHADER_STORAGE_BUFFER_BINDING"             value="0x90D3"/>
+  <enum name="SHADER_STORAGE_BUFFER_START"               value="0x90D4"/>
+  <enum name="SHADER_STORAGE_BUFFER_SIZE"                value="0x90D5"/>
+  <enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS"          value="0x90D6" />
+  <enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS"        value="0x90D7" />
+  <enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS"    value="0x90D8" />
+  <enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+  <enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS"        value="0x90DA" />
+  <enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS"         value="0x90DB" />
+  <enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS"        value="0x90DC" />
+  <enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS"        value="0x90DD" />
+  <enum name="MAX_SHADER_STORAGE_BLOCK_SIZE"             value="0x90DE" />
+  <enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT"    value="0x90DF" />
+  <enum name="DEPTH_STENCIL_TEXTURE_MODE"                value="0x90EA"/>
 </category>
 
 <category name="4.5">
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 7d8dfcb7e2e..a5a26a652ee 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -153,6 +153,7 @@ API_XML = \
 	ARB_shader_atomic_counters.xml \
 	ARB_shader_image_load_store.xml \
 	ARB_shader_subroutine.xml \
+	ARB_shader_storage_buffer_object.xml \
 	ARB_sync.xml \
 	ARB_tessellation_shader.xml \
 	ARB_texture_barrier.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index f0dcdca2aee..ec83cd43d16 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8211,7 +8211,11 @@
 
 <xi:include href="ARB_program_interface_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
-<!-- ARB extensions #135...#138 -->
+<!-- ARB extensions #135...#136 -->
+
+<xi:include href="ARB_shader_storage_buffer_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions #138 -->
 
 <xi:include href="ARB_texture_buffer_range.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
-- 
cgit v1.2.3


From 10b5c6491f60d7cf119554f7d8e797b896707335 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 22 Apr 2015 16:11:13 +0200
Subject: mesa: Add getters for the GL_ARB_shader_storage_buffer_object max
 constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
- Add tessellation shader constants support

v3:
- Add GLES 3.1 support.

v4:
- Move the getters to the proper place

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/get.c              |  7 +++++++
 src/mesa/main/get_hash_params.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 77184ee7986..539c41148dd 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -369,6 +369,12 @@ static const int extra_ARB_compute_shader_es31[] = {
    EXTRA_END
 };
 
+static const int extra_ARB_shader_storage_buffer_object_es31[] = {
+   EXT(ARB_shader_storage_buffer_object),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
 EXTRA_EXT(ARB_texture_cube_map);
 EXTRA_EXT(EXT_texture_array);
 EXTRA_EXT(NV_fog_distance);
@@ -417,6 +423,7 @@ EXTRA_EXT(EXT_polygon_offset_clamp);
 EXTRA_EXT(ARB_framebuffer_no_attachments);
 EXTRA_EXT(ARB_tessellation_shader);
 EXTRA_EXT(ARB_shader_subroutine);
+EXTRA_EXT(ARB_shader_storage_buffer_object);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 805becb26b7..38b08b02a65 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -469,6 +469,17 @@ descriptor=[
 
 # GL 4.4 / GLES 3.1
   [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
+
+  # GL_ARB_shader_storage_buffer_object / GLES 3.1
+  [ "MAX_VERTEX_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_FRAGMENT_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMPUTE_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.MaxCombinedShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BLOCK_SIZE", "CONTEXT_INT(Const.MaxShaderStorageBlockSize), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxShaderStorageBufferBindings), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_OUTPUT_RESOURCES", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.ShaderStorageBufferOffsetAlignment), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_storage_buffer_object_es31" ],
 ]},
 
 # Enums in OpenGL Core profile and ES 3.1
@@ -821,6 +832,11 @@ descriptor=[
 
 # GL_EXT_polygon_offset_clamp
   [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
+
+# GL_ARB_shader_storage_buffer_object
+  [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
 ]},
 
 # Enums restricted to OpenGL Core profile
-- 
cgit v1.2.3


From 5b080e3ddfa9e2241ecff3b0220785fbfe39fb08 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 14 Jul 2015 12:23:42 +0200
Subject: mesa: enable ARB_shader_storage_buffer_object extension for GLES 3.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/glsl_parser_extras.cpp | 2 +-
 src/glsl/glsl_parser_extras.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 223596be798..f5542415d4a 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -608,7 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
    EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
    EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
-   EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
+   EXT(ARB_shader_storage_buffer_object, true,  true,      ARB_shader_storage_buffer_object),
    EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
    EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
    EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 5b757d3716f..7fee43ece52 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -217,7 +217,7 @@ struct _mesa_glsl_parse_state {
 
    bool has_shader_storage_buffer_objects() const
    {
-      return ARB_shader_storage_buffer_object_enable || is_version(430, 0);
+      return ARB_shader_storage_buffer_object_enable || is_version(430, 310);
    }
 
    bool has_separate_shader_objects() const
-- 
cgit v1.2.3


From 614b5307fd485a62205b0ceb05657dd862c1f036 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Wed, 18 Mar 2015 07:36:24 +0100
Subject: i965: Enable ARB_shader_storage_buffer_object extension for gen7+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 1873827cacf..3c77f4773c6 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -329,6 +329,7 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_shader_texture_image_samples = true;
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
+      ctx->Extensions.ARB_shader_storage_buffer_object = true;
 
       if (can_do_pipelined_register_writes(brw)) {
          ctx->Extensions.ARB_draw_indirect = true;
-- 
cgit v1.2.3


From e92c35a8724efd36a35ac9106e5977c5ec2cb332 Mon Sep 17 00:00:00 2001
From: Antia Puentes <apuentes@igalia.com>
Date: Wed, 29 Jul 2015 16:01:24 +0200
Subject: glsl: Mark as active all elements of shared/std140 block arrays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1ca25ab (glsl: Do not eliminate 'shared' or 'std140' blocks
or block members) considered as active 'shared' and 'std140' uniform
blocks and uniform block arrays, but did not include the block array
elements. Because of that, it was possible to have an active uniform
block array without any elements marked as used, making the assertion
   ((b->num_array_elements > 0) == b->type->is_array())
in link_uniform_blocks() fail.

Fixes the following 5 dEQP tests:

 * dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.18
 * dEQP-GLES3.functional.ubo.random.nested_structs_instance_arrays.24
 * dEQP-GLES3.functional.ubo.random.nested_structs_arrays_instance_arrays.19
 * dEQP-GLES3.functional.ubo.random.all_per_block_buffers.49
 * dEQP-GLES3.functional.ubo.random.all_shared_buffer.36

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83508
Tested-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/link_uniform_block_active_visitor.cpp | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'src')

diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp
index 72e07828a6d..bcf17fef758 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -104,6 +104,22 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
    assert(b->num_array_elements == 0);
    assert(b->array_elements == NULL);
    assert(b->type != NULL);
+   assert(!b->type->is_array() || b->has_instance_name);
+
+   /* For uniform block arrays declared with a shared or std140 layout
+    * qualifier, mark all its instances as used.
+    */
+   if (b->type->is_array() && b->type->length > 0) {
+      b->num_array_elements = b->type->length;
+      b->array_elements = reralloc(this->mem_ctx,
+                                   b->array_elements,
+                                   unsigned,
+                                   b->num_array_elements);
+
+      for (unsigned i = 0; i < b->num_array_elements; i++) {
+         b->array_elements[i] = i;
+      }
+   }
 
    return visit_continue;
 }
@@ -145,6 +161,14 @@ link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
    assert((b->num_array_elements == 0) == (b->array_elements == NULL));
    assert(b->type != NULL);
 
+   /* If the block array was declared with a shared or
+    * std140 layout qualifier, all its instances have been already marked
+    * as used in link_uniform_block_active_visitor::visit(ir_variable *).
+    */
+   if (var->get_interface_type()->interface_packing !=
+       GLSL_INTERFACE_PACKING_PACKED)
+      return visit_continue_with_parent;
+
    ir_constant *c = ir->array_index->as_constant();
 
    if (c) {
-- 
cgit v1.2.3


From 266d05a3a0651ac954c91aea12c870940e8a9820 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Fri, 25 Sep 2015 09:56:39 +0300
Subject: glsl: fix packed varyings interface type and add default case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fixes Piglit test:
   arb_program_interface_query/linker/query-varyings.shader_test

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/linker.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 9d419ac9d39..75396fb3936 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3319,8 +3319,12 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage)
          switch (var->data.mode) {
          case ir_var_shader_in:
             iface = GL_PROGRAM_INPUT;
+            break;
          case ir_var_shader_out:
             iface = GL_PROGRAM_OUTPUT;
+            break;
+         default:
+            unreachable("unexpected type");
          }
          if (!add_program_resource(shProg, iface, var,
                                    build_stageref(shProg, var->name,
-- 
cgit v1.2.3


From d4b0e0b717b698682700bf1cd9d448043a57701d Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 10 Jun 2015 13:31:06 -0700
Subject: mesa: Remove debugging code from _mesa_reference_*.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/arrayobj.c     |  8 --------
 src/mesa/main/bufferobj.c    | 17 -----------------
 src/mesa/main/renderbuffer.c |  2 --
 src/mesa/main/samplerobj.c   |  8 --------
 src/mesa/main/shaderobj.c    | 12 ------------
 src/mesa/program/program.c   | 14 --------------
 6 files changed, 61 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 7c4004043de..28851434133 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -200,10 +200,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
       mtx_lock(&oldObj->Mutex);
       assert(oldObj->RefCount > 0);
       oldObj->RefCount--;
-#if 0
-      printf("ArrayObj %p %d DECR to %d\n",
-             (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
       deleteFlag = (oldObj->RefCount == 0);
       mtx_unlock(&oldObj->Mutex);
 
@@ -227,10 +223,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
       }
       else {
          vao->RefCount++;
-#if 0
-         printf("ArrayObj %p %d INCR to %d\n",
-                (void *) vao, vao->Name, vao->RefCount);
-#endif
          *ptr = vao;
       }
       mtx_unlock(&vao->Mutex);
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 20963bda8f2..f985982c801 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -450,23 +450,10 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
       mtx_lock(&oldObj->Mutex);
       assert(oldObj->RefCount > 0);
       oldObj->RefCount--;
-#if 0
-      printf("BufferObj %p %d DECR to %d\n",
-             (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
       deleteFlag = (oldObj->RefCount == 0);
       mtx_unlock(&oldObj->Mutex);
 
       if (deleteFlag) {
-
-         /* some sanity checking: don't delete a buffer still in use */
-#if 0
-         /* unfortunately, these tests are invalid during context tear-down */
-	 assert(ctx->Array.ArrayBufferObj != bufObj);
-	 assert(ctx->Array.VAO->IndexBufferObj != bufObj);
-	 assert(ctx->Array.VAO->Vertex.BufferObj != bufObj);
-#endif
-
 	 assert(ctx->Driver.DeleteBuffer);
          ctx->Driver.DeleteBuffer(ctx, oldObj);
       }
@@ -486,10 +473,6 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
       }
       else {
          bufObj->RefCount++;
-#if 0
-         printf("BufferObj %p %d INCR to %d\n",
-                (void *) bufObj, bufObj->Name, bufObj->RefCount);
-#endif
          *ptr = bufObj;
       }
       mtx_unlock(&bufObj->Mutex);
diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c
index e9d129a1dbb..b0d4c8cca26 100644
--- a/src/mesa/main/renderbuffer.c
+++ b/src/mesa/main/renderbuffer.c
@@ -174,7 +174,6 @@ _mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr,
       mtx_lock(&oldRb->Mutex);
       assert(oldRb->RefCount > 0);
       oldRb->RefCount--;
-      /*printf("RB DECR %p (%d) to %d\n", (void*) oldRb, oldRb->Name, oldRb->RefCount);*/
       deleteFlag = (oldRb->RefCount == 0);
       mtx_unlock(&oldRb->Mutex);
 
@@ -191,7 +190,6 @@ _mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr,
       /* reference new renderbuffer */
       mtx_lock(&rb->Mutex);
       rb->RefCount++;
-      /*printf("RB INCR %p (%d) to %d\n", (void*) rb, rb->Name, rb->RefCount);*/
       mtx_unlock(&rb->Mutex);
       *ptr = rb;
    }
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index 32180fb1ba2..241b5cb31fd 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -91,10 +91,6 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
       /*mtx_lock(&oldSamp->Mutex);*/
       assert(oldSamp->RefCount > 0);
       oldSamp->RefCount--;
-#if 0
-      printf("SamplerObj %p %d DECR to %d\n",
-             (void *) oldSamp, oldSamp->Name, oldSamp->RefCount);
-#endif
       deleteFlag = (oldSamp->RefCount == 0);
       /*mtx_unlock(&oldSamp->Mutex);*/
 
@@ -118,10 +114,6 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
       }
       else {
          samp->RefCount++;
-#if 0
-         printf("SamplerObj %p %d INCR to %d\n",
-                (void *) samp, samp->Name, samp->RefCount);
-#endif
          *ptr = samp;
       }
       /*mtx_unlock(&samp->Mutex);*/
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 110a18e1e2c..71d4ed69285 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -69,8 +69,6 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
 
       assert(old->RefCount > 0);
       old->RefCount--;
-      /*printf("SHADER DECR %p (%d) to %d\n",
-        (void*) old, old->Name, old->RefCount);*/
       deleteFlag = (old->RefCount == 0);
 
       if (deleteFlag) {
@@ -86,8 +84,6 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
    if (sh) {
       /* reference new */
       sh->RefCount++;
-      /*printf("SHADER INCR %p (%d) to %d\n",
-        (void*) sh, sh->Name, sh->RefCount);*/
       *ptr = sh;
    }
 }
@@ -209,10 +205,6 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
 
       assert(old->RefCount > 0);
       old->RefCount--;
-#if 0
-      printf("ShaderProgram %p ID=%u  RefCount-- to %d\n",
-             (void *) old, old->Name, old->RefCount);
-#endif
       deleteFlag = (old->RefCount == 0);
 
       if (deleteFlag) {
@@ -227,10 +219,6 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
 
    if (shProg) {
       shProg->RefCount++;
-#if 0
-      printf("ShaderProgram %p ID=%u  RefCount++ to %d\n",
-             (void *) shProg, shProg->Name, shProg->RefCount);
-#endif
       *ptr = shProg;
    }
 }
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 2d03bba3d12..18d6754a99a 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -465,13 +465,6 @@ _mesa_reference_program_(struct gl_context *ctx,
       GLboolean deleteFlag;
 
       /*mtx_lock(&(*ptr)->Mutex);*/
-#if 0
-      printf("Program %p ID=%u Target=%s  Refcount-- to %d\n",
-             *ptr, (*ptr)->Id,
-             ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
-             (*ptr)->RefCount - 1);
-#endif
       assert((*ptr)->RefCount > 0);
       (*ptr)->RefCount--;
 
@@ -490,13 +483,6 @@ _mesa_reference_program_(struct gl_context *ctx,
    if (prog) {
       /*mtx_lock(&prog->Mutex);*/
       prog->RefCount++;
-#if 0
-      printf("Program %p ID=%u Target=%s  Refcount++ to %d\n",
-             prog, prog->Id,
-             (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
-             prog->RefCount);
-#endif
       /*mtx_unlock(&prog->Mutex);*/
    }
 
-- 
cgit v1.2.3


From 3c57a102ebd2efc427a59bf8eb441b4e5dec8553 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 12 Jun 2015 09:07:09 -0700
Subject: mesa: Add locking to sampler objects.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/mtypes.h     |  1 +
 src/mesa/main/samplerobj.c | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index d53d99ad843..5445d39c560 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -944,6 +944,7 @@ typedef enum
  */
 struct gl_sampler_object
 {
+   mtx_t Mutex;
    GLuint Name;
    GLint RefCount;
    GLchar *Label;               /**< GL_KHR_debug */
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index 241b5cb31fd..c7b96664c21 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -88,11 +88,11 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
       GLboolean deleteFlag = GL_FALSE;
       struct gl_sampler_object *oldSamp = *ptr;
 
-      /*mtx_lock(&oldSamp->Mutex);*/
+      mtx_lock(&oldSamp->Mutex);
       assert(oldSamp->RefCount > 0);
       oldSamp->RefCount--;
       deleteFlag = (oldSamp->RefCount == 0);
-      /*mtx_unlock(&oldSamp->Mutex);*/
+      mtx_unlock(&oldSamp->Mutex);
 
       if (deleteFlag) {
 	 assert(ctx->Driver.DeleteSamplerObject);
@@ -105,7 +105,7 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
 
    if (samp) {
       /* reference new sampler */
-      /*mtx_lock(&samp->Mutex);*/
+      mtx_lock(&samp->Mutex);
       if (samp->RefCount == 0) {
          /* this sampler's being deleted (look just above) */
          /* Not sure this can every really happen.  Warn if it does. */
@@ -116,7 +116,7 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
          samp->RefCount++;
          *ptr = samp;
       }
-      /*mtx_unlock(&samp->Mutex);*/
+      mtx_unlock(&samp->Mutex);
    }
 }
 
@@ -127,6 +127,7 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
 static void
 _mesa_init_sampler_object(struct gl_sampler_object *sampObj, GLuint name)
 {
+   mtx_init(&sampObj->Mutex, mtx_plain);
    sampObj->Name = name;
    sampObj->RefCount = 1;
    sampObj->WrapS = GL_REPEAT;
@@ -169,6 +170,7 @@ static void
 _mesa_delete_sampler_object(struct gl_context *ctx,
                             struct gl_sampler_object *sampObj)
 {
+   mtx_destroy(&sampObj->Mutex);
    free(sampObj->Label);
    free(sampObj);
 }
-- 
cgit v1.2.3


From 1dd943d7fb5bd374e871436ccf6546327687c761 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 30 Jul 2015 11:25:27 -0700
Subject: mesa: Add locking to programs.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/mtypes.h     |  1 +
 src/mesa/program/program.c | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 5445d39c560..a872942d2a8 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1888,6 +1888,7 @@ enum gl_frag_depth_layout
  */
 struct gl_program
 {
+   mtx_t Mutex;
    GLuint Id;
    GLint RefCount;
    GLubyte *String;  /**< Null-terminated program text */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 18d6754a99a..e94c1021258 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -226,6 +226,7 @@ init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
    assert(prog);
 
    memset(prog, 0, sizeof(*prog));
+   mtx_init(&prog->Mutex, mtx_plain);
    prog->Id = id;
    prog->Target = target;
    prog->RefCount = 1;
@@ -418,6 +419,7 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
       ralloc_free(prog->nir);
    }
 
+   mtx_destroy(&prog->Mutex);
    free(prog);
 }
 
@@ -463,17 +465,18 @@ _mesa_reference_program_(struct gl_context *ctx,
 
    if (*ptr) {
       GLboolean deleteFlag;
+      struct gl_program *oldProg = *ptr;
 
-      /*mtx_lock(&(*ptr)->Mutex);*/
-      assert((*ptr)->RefCount > 0);
-      (*ptr)->RefCount--;
+      mtx_lock(&oldProg->Mutex);
+      assert(oldProg->RefCount > 0);
+      oldProg->RefCount--;
 
-      deleteFlag = ((*ptr)->RefCount == 0);
-      /*mtx_lock(&(*ptr)->Mutex);*/
+      deleteFlag = (oldProg->RefCount == 0);
+      mtx_unlock(&oldProg->Mutex);
 
       if (deleteFlag) {
          assert(ctx);
-         ctx->Driver.DeleteProgram(ctx, *ptr);
+         ctx->Driver.DeleteProgram(ctx, oldProg);
       }
 
       *ptr = NULL;
@@ -481,9 +484,9 @@ _mesa_reference_program_(struct gl_context *ctx,
 
    assert(!*ptr);
    if (prog) {
-      /*mtx_lock(&prog->Mutex);*/
+      mtx_lock(&prog->Mutex);
       prog->RefCount++;
-      /*mtx_unlock(&prog->Mutex);*/
+      mtx_unlock(&prog->Mutex);
    }
 
    *ptr = prog;
-- 
cgit v1.2.3


From 1e97b41893a4f53a71ee141a5e8a046fed7b49cd Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Fri, 18 Sep 2015 15:51:26 -0400
Subject: radeon/vce: fix vui time_scale zero error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

if app pass 0 as frame_rate_num, it should not be encoded to the VUI.

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index e64fbc7afb0..c0056590427 100644
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -233,6 +233,9 @@ static void vui(struct rvce_encoder *enc)
 {
 	int i;
 
+	if (!enc->pic.rate_ctrl.frame_rate_num)
+		return;
+
 	RVCE_BEGIN(0x04000009); // vui
 	RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
 	RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
-- 
cgit v1.2.3


From 9932142192f848ae9dbc644551653547640346b4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Thu, 24 Sep 2015 23:50:01 +0200
Subject: radeonsi: add scratch buffer to the buffer list when it's
 re-allocated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 56f868d81db..b5e14ead160 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1246,6 +1246,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
 			if (!sctx->scratch_buffer)
 				return false;
+			sctx->emit_scratch_reloc = true;
 		}
 
 		/* Update the shaders, so they are using the latest scratch.  The
-- 
cgit v1.2.3


From 776a3845d6325578d51eea6e7d91ffb475862fc8 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Sat, 18 Jul 2015 10:51:51 +1000
Subject: glsl: clean-up link uniform code

These changes are also needed to allow linking of
struct and interface arrays of arrays.

Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
---
 src/glsl/link_uniforms.cpp | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 1c901e2cecb..94d7287221a 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -77,6 +77,7 @@ program_resource_visitor::process(ir_variable *var)
 {
    unsigned record_array_count = 1;
    const glsl_type *t = var->type;
+   const glsl_type *t_without_array = var->type->without_array();
    const bool row_major =
       var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
 
@@ -153,13 +154,8 @@ program_resource_visitor::process(ir_variable *var)
       recursion(var->type, &name, strlen(name), row_major, NULL, packing,
                 false, record_array_count);
       ralloc_free(name);
-   } else if (t->is_interface()) {
-      char *name = ralloc_strdup(NULL, var->type->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
-                false, record_array_count);
-      ralloc_free(name);
-   } else if (t->is_array() && t->fields.array->is_interface()) {
-      char *name = ralloc_strdup(NULL, var->type->fields.array->name);
+   } else if (t_without_array->is_interface()) {
+      char *name = ralloc_strdup(NULL, t_without_array->name);
       recursion(var->type, &name, strlen(name), row_major, NULL, packing,
                 false, record_array_count);
       ralloc_free(name);
@@ -234,8 +230,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
          (*name)[name_length] = '\0';
          this->leave_record(t, *name, row_major, packing);
       }
-   } else if (t->is_array() && (t->fields.array->is_record()
-                                || t->fields.array->is_interface())) {
+   } else if (t->without_array()->is_record() ||
+              t->without_array()->is_interface()) {
       if (record_type == NULL && t->fields.array->is_record())
          record_type = t->fields.array;
 
@@ -974,8 +970,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
 
       if (var->type->is_record()) {
          sentinel = '.';
-      } else if (var->type->is_array()
-                 && var->type->fields.array->is_record()) {
+      } else if (var->type->without_array()->is_record()) {
          sentinel = '[';
       }
 
-- 
cgit v1.2.3


From 9bad7afbc2ca6003da9a19c486b81d6ed0b8b0df Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 27 May 2015 21:31:59 +1000
Subject: glsl: add helper for calculating size of AoA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

V2: return 0 if not array rather than -1

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/glsl_types.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'src')

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 23ada15b854..3ec764219de 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -577,6 +577,25 @@ struct glsl_type {
       return t;
    }
 
+   /**
+    * Return the total number of elements in an array including the elements
+    * in arrays of arrays.
+    */
+   unsigned arrays_of_arrays_size() const
+   {
+      if (!is_array())
+         return 0;
+
+      unsigned size = length;
+      const glsl_type *base_type = fields.array;
+
+      while (base_type->is_array()) {
+         size = size * base_type->length;
+         base_type = base_type->fields.array;
+      }
+      return size;
+   }
+
    /**
     * Return the amount of atomic counter storage required for a type.
     */
-- 
cgit v1.2.3


From 1d401f9ce48f85b4340fcd4a591222aa165910f1 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 27 May 2015 21:33:45 +1000
Subject: glsl: validate binding qualifier for AoA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/ast_to_hir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 6b2e140cf43..b8d66dd0609 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2179,7 +2179,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
    }
 
    const struct gl_context *const ctx = state->ctx;
-   unsigned elements = type->is_array() ? type->length : 1;
+   unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
    unsigned max_index = qual->binding + elements - 1;
    const glsl_type *base_type = type->without_array();
 
-- 
cgit v1.2.3


From abdab88b30ab2eb78cf20fd91361f820e1f3e06b Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Sun, 4 May 2014 21:13:25 +1000
Subject: glsl: calculate component size for arrays of arrays when varying
 packing disabled

Signed-off-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/link_varyings.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index f7a7b8c4c5b..0d343d64a2e 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -956,9 +956,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
          type = type->fields.array;
       }
 
-      slots = (type->is_array()
-            ? (type->length * type->fields.array->matrix_columns)
-            : type->matrix_columns);
+      if (type->is_array()) {
+         slots = 1;
+         while (type->is_array()) {
+            slots *= type->length;
+            type = type->fields.array;
+         }
+         slots *= type->matrix_columns;
+      } else {
+         slots = var->type->matrix_columns;
+      }
       this->matches[this->num_matches].num_components = 4 * slots;
    } else {
       this->matches[this->num_matches].num_components
-- 
cgit v1.2.3


From 511a86383b9d6490788a15cca2df791c6d63bfd7 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Tue, 15 Sep 2015 08:18:39 +0200
Subject: main/tests: Enable glShaderStorageBlockBinding() check in
 dispatch_sanity test

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/tests/dispatch_sanity.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index 0ddda59410e..b19c6d74bc0 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -844,7 +844,7 @@ const struct function common_desktop_functions_possible[] = {
    { "glGetProgramResourceiv", 43, -1 },
    { "glGetProgramResourceLocation", 43, -1 },
    { "glGetProgramResourceLocationIndex", 43, -1 },
-// { "glShaderStorageBlockBinding", 43, -1 },           // XXX: Add to xml
+   { "glShaderStorageBlockBinding", 43, -1 },
 // { "glTextureBufferRangeEXT", 43, -1 },               // XXX: Add to xml
    { "glTexStorage2DMultisample", 43, -1 },
    { "glTexStorage3DMultisample", 43, -1 },
-- 
cgit v1.2.3


From 39d4b553a87228fd93bfdd7100d0687e55aec4b7 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 9 Sep 2015 20:23:04 -0700
Subject: i965: Fix BRW_VARYING_SLOT_PAD handling in the scalar VS backend.

We can't just break for padding slots.  Instead, treat them like
unwritten output variables, so we handle flushing and incrementing
urb_offset correctly.

Paul introduced the concept of padding slots back in 2011, but we've
never actually used them for anything.  So it's unsurprising that the
scalar VS backend didn't handle them quite right.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5bf6449fa13..47d7ae4f57c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -939,9 +939,6 @@ fs_visitor::emit_urb_writes()
          unreachable("unexpected scalar vs output");
          break;
 
-      case BRW_VARYING_SLOT_PAD:
-         break;
-
       default:
          /* gl_Position is always in the vue map, but isn't always written by
           * the shader.  Other varyings (clip distances) get added to the vue
@@ -951,7 +948,8 @@ fs_visitor::emit_urb_writes()
           * slot for writing we flush a mlen 5 urb write, otherwise we just
           * advance the urb_offset.
           */
-         if (this->outputs[varying].file == BAD_FILE) {
+         if (varying == BRW_VARYING_SLOT_PAD ||
+             this->outputs[varying].file == BAD_FILE) {
             if (length > 0)
                flush = true;
             else
-- 
cgit v1.2.3


From 268008f98c3810b9f276df985dc93efc0c49f33e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 9 Sep 2015 20:21:59 -0700
Subject: i965: Initialize unused VUE map slots to BRW_VARYING_SLOT_PAD.

Nothing actually relies on unused slots being initialized to
BRW_VARYING_SLOT_COUNT.  Soon, we're going to have VUE maps with holes
in them, at which point pre-filling with BRW_VARYING_SLOT_PAD make a lot
more sense.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vue_map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 76875789ba8..9d828289afc 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -80,7 +80,7 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
    vue_map->num_slots = 0;
    for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
       vue_map->varying_to_slot[i] = -1;
-      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
    }
 
    /* VUE header: format depends on chip generation and whether clipping is
-- 
cgit v1.2.3


From 1e5180316cd62c8c8aa46399ab6a17ad9604d48f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 9 Sep 2015 16:09:35 -0700
Subject: i965/vue: Make assign_vue_map() take an explicit slot.

Our plan of assigning consecutive slots doesn't work properly for
separate shader objects - at least, if we want to avoid recompiling them
whenever the interface changes.

As a first step, make assign_vue_map take an explicit slot parameter,
rather than implicitly incrementing it.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vue_map.c | 35 ++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 9d828289afc..1ef52143cc5 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -44,13 +44,13 @@
 #include "brw_context.h"
 
 static inline void
-assign_vue_slot(struct brw_vue_map *vue_map, int varying)
+assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
 {
    /* Make sure this varying hasn't been assigned a slot already */
    assert (vue_map->varying_to_slot[varying] == -1);
 
-   vue_map->varying_to_slot[varying] = vue_map->num_slots;
-   vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+   vue_map->varying_to_slot[varying] = slot;
+   vue_map->slot_to_varying[slot] = varying;
 }
 
 /**
@@ -77,12 +77,13 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
     */
    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
 
-   vue_map->num_slots = 0;
    for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
       vue_map->varying_to_slot[i] = -1;
       vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
    }
 
+   int slot = 0;
+
    /* VUE header: format depends on chip generation and whether clipping is
     * enabled.
     *
@@ -98,9 +99,9 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
        * On Ironlake the VUE header is nominally 20 dwords, but the hardware
        * will accept the same header layout as Gen4 [and should be a bit faster]
        */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
    } else {
       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
        * dword 0-3 of the header is indices, point width, clip flags.
@@ -109,25 +110,25 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
        * enabled.
        * dword 8-11 or 16-19 is the first vertex element data we fill.
        */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
 
       /* front and back colors need to be consecutive so that we can use
        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
        * two-sided color.
        */
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+         assign_vue_slot(vue_map, VARYING_SLOT_COL0, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC0, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+         assign_vue_slot(vue_map, VARYING_SLOT_COL1, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
    }
 
    /* The hardware doesn't care about the rest of the vertex outputs, so just
@@ -142,7 +143,9 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
    for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
       if ((slots_valid & BITFIELD64_BIT(i)) &&
           vue_map->varying_to_slot[i] == -1) {
-         assign_vue_slot(vue_map, i);
+         assign_vue_slot(vue_map, i, slot++);
       }
    }
+
+   vue_map->num_slots = slot;
 }
-- 
cgit v1.2.3


From 99df02ca26f6127c8fa24d38a8a069ac6159356a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 9 Sep 2015 16:21:56 -0700
Subject: i965: Don't re-layout varyings for separate shader programs.

Previously, our VUE map code always assigned slots to varyings
sequentially, in one contiguous block.

This was a bad fit for separate shaders - the GS input layout depended
or the VS output layout, so if we swapped out vertex shaders, we might
have to recompile the GS on the fly - which rather defeats the point of
using separate shader objects.  (Tessellation would suffer from this
as well - we could have to recompile the HS, DS, and GS.)

Instead, this patch makes the VUE map for separate shaders use a fixed
layout, based on the input/output variable's location field.  (This is
either specified by layout(location = ...) or assigned by the linker.)
Corresponding inputs/outputs will match up by location; if there's a
mismatch, we're allowed to have undefined behavior.

This may be less efficient - depending what locations were chosen, we
may have empty padding slots in the VUE.  But applications presumably
use small consecutive integers for locations, so it hopefully won't be
much worse in practice.

3% of Dota 2 Reborn shaders are hurt, but only by 2 instructions.
This seems like a small price to pay for avoiding recompiles.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_context.h | 14 ++++++++-
 src/mesa/drivers/dri/i965/brw_fs.cpp    |  3 +-
 src/mesa/drivers/dri/i965/brw_gs.c      | 10 +++++--
 src/mesa/drivers/dri/i965/brw_vs.c      |  7 +++--
 src/mesa/drivers/dri/i965/brw_vue_map.c | 51 ++++++++++++++++++++++++++-------
 5 files changed, 67 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 144d3e327d4..a7b612ad545 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -540,6 +540,17 @@ struct brw_vue_map {
     */
    GLbitfield64 slots_valid;
 
+   /**
+    * Is this VUE map for a separate shader pipeline?
+    *
+    * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+    * without the linker having a chance to dead code eliminate unused varyings.
+    *
+    * This means that we have to use a fixed slot layout, based on the output's
+    * location field, rather than assigning slots in a compact contiguous block.
+    */
+   bool separate;
+
    /**
     * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
     * not stored in a slot (because they are not written, or because
@@ -585,7 +596,8 @@ static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
 
 void brw_compute_vue_map(const struct brw_device_info *devinfo,
                          struct brw_vue_map *vue_map,
-                         GLbitfield64 slots_valid);
+                         GLbitfield64 slots_valid,
+                         bool separate_shader);
 
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a8f5520fb94..49dc7f65b48 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1440,7 +1440,8 @@ fs_visitor::calculate_urb_setup()
           */
          struct brw_vue_map prev_stage_vue_map;
          brw_compute_vue_map(devinfo, &prev_stage_vue_map,
-                             key->input_slots_valid);
+                             key->input_slots_valid,
+                             shader_prog->SeparateShader);
          int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
          assert(prev_stage_vue_map.num_slots <= first_slot + 32);
          for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 16ea6846285..38b3e3a5cd9 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -120,7 +120,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
    GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
 
    brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &c.prog_data.base.vue_map, outputs_written);
+                       &c.prog_data.base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
 
    /* Compute the output vertex size.
     *
@@ -243,7 +244,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
       get_hw_prim_for_gl_prim(gp->program.OutputType);
 
    brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &c.input_vue_map, c.key.input_varyings);
+                       &c.input_vue_map, c.key.input_varyings,
+                       prog->SeparateShader);
 
    /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
     * need to program a URB read length of ceiling(num_slots / 2).
@@ -357,7 +359,9 @@ brw_upload_gs_prog(struct brw_context *brw)
    brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
 
    if (brw->gs.prog_data->base.vue_map.slots_valid !=
-       brw->vue_map_geom_out.slots_valid) {
+       brw->vue_map_geom_out.slots_valid ||
+       brw->gs.prog_data->base.vue_map.separate !=
+       brw->vue_map_geom_out.separate) {
       brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 465e78f4c74..b1ec9637c32 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -180,7 +180,8 @@ brw_codegen_vs_prog(struct brw_context *brw,
    }
 
    brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &prog_data.base.vue_map, outputs_written);
+                       &prog_data.base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
 
    if (0) {
       _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
@@ -388,7 +389,9 @@ brw_upload_vs_prog(struct brw_context *brw)
    brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
 
    if (brw->vs.prog_data->base.vue_map.slots_valid !=
-       brw->vue_map_geom_out.slots_valid) {
+       brw->vue_map_geom_out.slots_valid ||
+       brw->vs.prog_data->base.vue_map.separate !=
+       brw->vue_map_geom_out.separate) {
       brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
       if (brw->gen < 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 1ef52143cc5..45662bd5afc 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -59,10 +59,18 @@ assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
 void
 brw_compute_vue_map(const struct brw_device_info *devinfo,
                     struct brw_vue_map *vue_map,
-                    GLbitfield64 slots_valid)
+                    GLbitfield64 slots_valid,
+                    bool separate)
 {
+   /* Keep using the packed/contiguous layout on old hardware - we only need
+    * the SSO layout when using geometry/tessellation shaders or 32 FS input
+    * varyings, which only exist on Gen >= 6.  It's also a bit more efficient.
+    */
+   if (devinfo->gen < 6)
+      separate = false;
+
    vue_map->slots_valid = slots_valid;
-   int i;
+   vue_map->separate = separate;
 
    /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
     * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
@@ -77,7 +85,7 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
     */
    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
 
-   for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+   for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
       vue_map->varying_to_slot[i] = -1;
       vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
    }
@@ -131,21 +139,42 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
          assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
    }
 
-   /* The hardware doesn't care about the rest of the vertex outputs, so just
-    * assign them contiguously.  Don't reassign outputs that already have a
-    * slot.
+   /* The hardware doesn't care about the rest of the vertex outputs, so we
+    * can assign them however we like.  For normal programs, we simply assign
+    * them contiguously.
+    *
+    * For separate shader pipelines, we first assign built-in varyings
+    * contiguous slots.  This works because ARB_separate_shader_objects
+    * requires that all shaders have matching built-in varying interface
+    * blocks.  Next, we assign generic varyings based on their location
+    * (either explicit or linker assigned).  This guarantees a fixed layout.
     *
     * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
     * since it's encoded as the clip distances by emit_clip_distances().
     * However, it may be output by transform feedback, and we'd rather not
     * recompute state when TF changes, so we just always include it.
     */
-   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
-      if ((slots_valid & BITFIELD64_BIT(i)) &&
-          vue_map->varying_to_slot[i] == -1) {
-         assign_vue_slot(vue_map, i, slot++);
+   GLbitfield64 builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
+   while (builtins != 0) {
+      const int varying = ffsll(builtins) - 1;
+      if (vue_map->varying_to_slot[varying] == -1) {
+         assign_vue_slot(vue_map, varying, slot++);
+      }
+      builtins &= ~BITFIELD64_BIT(varying);
+   }
+
+   const int first_generic_slot = slot;
+   GLbitfield64 generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
+   while (generics != 0) {
+      const int varying = ffsll(generics) - 1;
+      if (separate) {
+         slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
+         assign_vue_slot(vue_map, varying, slot);
+      } else {
+         assign_vue_slot(vue_map, varying, slot++);
       }
+      generics &= ~BITFIELD64_BIT(varying);
    }
 
-   vue_map->num_slots = slot;
+   vue_map->num_slots = separate ? slot + 1 : slot;
 }
-- 
cgit v1.2.3


From 6301af22bb80b2c177539074e3b2c68e65c15d41 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 29 Aug 2015 00:01:15 -0700
Subject: i965/gs: Remove the dependency on the VS VUE map.

Because we only support geometry shaders in core profile, we can safely
ignore any driver-extending of VS outputs.

Those are:
- Legacy userclipping (doesn't exist in core profile)
- Edgeflag copying (Gen4-5 only, no GS support)
- Point coord replacement (Gen4-5 only, no GS support)
- front/back color hacks (Gen4-5 only, no GS support)

v2: Rebase; leave a comment about why SSO works.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_gs.c      | 23 ++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_program.h |  2 --
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 38b3e3a5cd9..77be9d9beda 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -243,8 +243,21 @@ brw_codegen_gs_prog(struct brw_context *brw,
    c.prog_data.output_topology =
       get_hw_prim_for_gl_prim(gp->program.OutputType);
 
+   /* The GLSL linker will have already matched up GS inputs and the outputs
+    * of prior stages.  The driver does extend VS outputs in some cases, but
+    * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+    * geometry shader support.  So we can safely ignore that.
+    *
+    * For SSO pipelines, we use a fixed VUE map layout based on variable
+    * locations, so we can rely on rendezvous-by-location making this work.
+    *
+    * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+    * written by previous stages and shows up via payload magic.
+    */
+   GLbitfield64 inputs_read =
+      gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
    brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &c.input_vue_map, c.key.input_varyings,
+                       &c.input_vue_map, inputs_read,
                        prog->SeparateShader);
 
    /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
@@ -305,9 +318,6 @@ brw_gs_populate_key(struct brw_context *brw,
    /* _NEW_TEXTURE */
    brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
                                       &key->tex);
-
-   /* BRW_NEW_VUE_MAP_VS */
-   key->input_varyings = brw->vue_map_vs.slots_valid;
 }
 
 void
@@ -386,11 +396,6 @@ brw_gs_precompile(struct gl_context *ctx,
    brw_setup_tex_for_precompile(brw, &key.tex, prog);
    key.program_string_id = bgp->id;
 
-   /* Assume that the set of varyings coming in from the vertex shader exactly
-    * matches what the geometry shader requires.
-    */
-   key.input_varyings = gp->Base.InputsRead;
-
    success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
 
    brw->gs.base.prog_offset = old_prog_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 00e8f3f370e..72d68d869fc 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -118,8 +118,6 @@ struct brw_gs_prog_key
 {
    unsigned program_string_id;
 
-   uint64_t input_varyings;
-
    struct brw_sampler_prog_key_data tex;
 };
 
-- 
cgit v1.2.3


From df221f65e26199a74bc259d3f94e70637b843afa Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 29 Aug 2015 00:33:10 -0700
Subject: i965: Simplify handling of VUE map changes.

The old code was disasterously complex - spread across multiple atoms
which may not even run, inspecting the dirty bits to try and decide
whether it was necessary to do checks...storing VS information in
brw_context...extra flagging...

This code tripped me and Carl up very badly when working on the
shader cache code.  It's very fragile and hard to maintain.

Now that geometry shaders only depend on their inputs and don't have
to worry about the VS VUE map, we can dramatically simplify this:
just compute the VUE map coming out of the geometry shader stage
in brw_upload_programs.  If it changes, flag it.  Done.

v2: Also check vue_map.separable.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_context.h      | 12 +-----------
 src/mesa/drivers/dri/i965/brw_gs.c           | 16 +---------------
 src/mesa/drivers/dri/i965/brw_state_upload.c | 16 +++++++++++++++-
 src/mesa/drivers/dri/i965/brw_vs.c           | 15 ---------------
 4 files changed, 17 insertions(+), 42 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a7b612ad545..d36891cd310 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -194,7 +194,6 @@ enum brw_state_id {
    BRW_STATE_GS_CONSTBUF,
    BRW_STATE_PROGRAM_CACHE,
    BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_VUE_MAP_VS,
    BRW_STATE_VUE_MAP_GEOM_OUT,
    BRW_STATE_TRANSFORM_FEEDBACK,
    BRW_STATE_RASTERIZER_DISCARD,
@@ -276,7 +275,6 @@ enum brw_state_id {
 #define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
 #define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS              (1ull << BRW_STATE_VUE_MAP_VS)
 #define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
 #define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
 #define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
@@ -1374,17 +1372,9 @@ struct brw_context
       GLuint curbe_offset;
    } curbe;
 
-   /**
-    * Layout of vertex data exiting the vertex shader.
-    *
-    * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
-    */
-   struct brw_vue_map vue_map_vs;
-
    /**
     * Layout of vertex data exiting the geometry portion of the pipleine.
-    * This comes from the geometry shader if one exists, otherwise from the
-    * vertex shader.
+    * This comes from the last enabled shader stage (GS, DS, or VS).
     *
     * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 77be9d9beda..1f219c0eac6 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -297,8 +297,7 @@ brw_gs_state_dirty(struct brw_context *brw)
    return brw_state_dirty(brw,
                           _NEW_TEXTURE,
                           BRW_NEW_GEOMETRY_PROGRAM |
-                          BRW_NEW_TRANSFORM_FEEDBACK |
-                          BRW_NEW_VUE_MAP_VS);
+                          BRW_NEW_TRANSFORM_FEEDBACK);
 }
 
 static void
@@ -336,11 +335,6 @@ brw_upload_gs_prog(struct brw_context *brw)
 
    if (gp == NULL) {
       /* No geometry shader.  Vertex data just passes straight through. */
-      if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
-         brw->vue_map_geom_out = brw->vue_map_vs;
-         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-      }
-
       if (brw->gen == 6 &&
           (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
          gen6_brw_upload_ff_gs_prog(brw);
@@ -367,14 +361,6 @@ brw_upload_gs_prog(struct brw_context *brw)
       (void)success;
    }
    brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-
-   if (brw->gs.prog_data->base.vue_map.slots_valid !=
-       brw->vue_map_geom_out.slots_valid ||
-       brw->gs.prog_data->base.vue_map.separate !=
-       brw->vue_map_geom_out.separate) {
-      brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
-      brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-   }
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index bf06ed38bd7..2e8a0b3de9b 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -594,7 +594,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
-   DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
@@ -649,6 +648,21 @@ brw_upload_programs(struct brw_context *brw,
       else
          brw_upload_gs_prog(brw);
 
+      /* Update the VUE map for data exiting the GS stage of the pipeline.
+       * This comes from the last enabled shader stage.
+       */
+      GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
+      bool old_separate = brw->vue_map_geom_out.separate;
+      if (brw->geometry_program)
+         brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+      else
+         brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+
+      /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
+      if (old_slots != brw->vue_map_geom_out.slots_valid ||
+          old_separate != brw->vue_map_geom_out.separate)
+         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
+
       brw_upload_wm_prog(brw);
    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
       brw_upload_cs_prog(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index b1ec9637c32..0c60bde511e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -387,21 +387,6 @@ brw_upload_vs_prog(struct brw_context *brw)
       assert(success);
    }
    brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
-
-   if (brw->vs.prog_data->base.vue_map.slots_valid !=
-       brw->vue_map_geom_out.slots_valid ||
-       brw->vs.prog_data->base.vue_map.separate !=
-       brw->vue_map_geom_out.separate) {
-      brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
-      brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
-      if (brw->gen < 6) {
-         /* No geometry shader support, so the VS VUE map is the VUE map for
-          * the output of the "geometry" portion of the pipeline.
-          */
-         brw->vue_map_geom_out = brw->vue_map_vs;
-         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-      }
-   }
 }
 
 bool
-- 
cgit v1.2.3


From 02530c5dc5dc88078f41fb134c7e0e3833c9f772 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Sep 2015 17:01:23 -0700
Subject: nir: Add a function to count the number of vertices a GS emits.

Some hardware (such as Broadwell) can run geometry shaders more
efficiently when the number of vertices emitted is statically known.

This pass provides a way to obtain the constant vertex count, or
-1 indicating that the vertex count is unknown/non-constant.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/glsl/Makefile.sources            |  1 +
 src/glsl/nir/nir.h                   |  2 +
 src/glsl/nir/nir_gs_count_vertices.c | 93 ++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+)
 create mode 100644 src/glsl/nir/nir_gs_count_vertices.c

(limited to 'src')

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index a8f4994cf34..32b6dba2e91 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -30,6 +30,7 @@ NIR_FILES = \
 	nir/nir_control_flow_private.h \
 	nir/nir_dominance.c \
 	nir/nir_from_ssa.c \
+	nir/nir_gs_count_vertices.c \
 	nir/nir_intrinsics.c \
 	nir/nir_intrinsics.h \
 	nir/nir_live_variables.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4f45770e02d..d0c7b04a49f 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1804,6 +1804,8 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
 void nir_dump_cfg(nir_shader *shader, FILE *fp);
 
+int nir_gs_count_vertices(nir_shader *shader);
+
 bool nir_split_var_copies(nir_shader *shader);
 
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
diff --git a/src/glsl/nir/nir_gs_count_vertices.c b/src/glsl/nir/nir_gs_count_vertices.c
new file mode 100644
index 00000000000..e0bdf170d22
--- /dev/null
+++ b/src/glsl/nir/nir_gs_count_vertices.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_intrinsic_instr *
+as_intrinsic(nir_instr *instr, nir_intrinsic_op op)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return NULL;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic != op)
+      return NULL;
+
+   return intrin;
+}
+
+static nir_intrinsic_instr *
+as_set_vertex_count(nir_instr *instr)
+{
+   return as_intrinsic(instr, nir_intrinsic_set_vertex_count);
+}
+
+/**
+ * If a geometry shader emits a constant number of vertices, return the
+ * number of vertices.  Otherwise, return -1 (unknown).
+ *
+ * This only works if you've used nir_lower_gs_intrinsics() to do vertex
+ * counting at the NIR level.
+ */
+int
+nir_gs_count_vertices(nir_shader *shader)
+{
+   int count = -1;
+
+   nir_foreach_overload(shader, overload) {
+      if (!overload->impl)
+         continue;
+
+      /* set_vertex_count intrinsics only appear in predecessors of the
+       * end block.  So we don't need to walk all of them.
+       */
+      struct set_entry *entry;
+      set_foreach(overload->impl->end_block->predecessors, entry) {
+         nir_block *block = (nir_block *) entry->key;
+
+         nir_foreach_instr_reverse(block, instr) {
+            nir_intrinsic_instr *intrin = as_set_vertex_count(instr);
+            if (!intrin)
+               continue;
+
+            nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+            /* We've found a non-constant value.  Bail. */
+            if (!val)
+               return -1;
+
+            if (count == -1)
+               count = val->i[0];
+
+            /* We've found contradictory set_vertex_count intrinsics.
+             * This can happen if there are early-returns in main() and
+             * different paths emit different numbers of vertices.
+             */
+            if (count != val->i[0])
+               return -1;
+         }
+      }
+   }
+
+   return count;
+}
-- 
cgit v1.2.3


From bcef2abad7cf255b6ac112b9ebf0ff75e491c968 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Sep 2015 20:58:05 -0700
Subject: i965: Move GS_THREAD_END mlen calculations out of the generator.

The visitor was setting a mlen that was wrong for Broadwell, but the
generator was ignoring it and doing the right thing regardless.  We may
as well move the logic fully into the visitor.  This will be useful in
the next commit as well.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp  | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index c7546070bf9..50f96632f7f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -505,7 +505,7 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
                  inst->base_mrf, /* starting mrf reg nr */
                  src,
                  BRW_URB_WRITE_EOT | inst->urb_write_flags,
-                 devinfo->gen >= 8 ? 2 : 1,/* message len */
+                 inst->mlen,
                  0,              /* response len */
                  0,              /* urb destination offset */
                  BRW_URB_SWIZZLE_INTERLEAVE);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 3cb1b4c8793..ff5bd989cc3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -244,7 +244,7 @@ vec4_gs_visitor::emit_thread_end()
       emit_shader_time_end();
    inst = emit(GS_OPCODE_THREAD_END);
    inst->base_mrf = base_mrf;
-   inst->mlen = 1;
+   inst->mlen = devinfo->gen >= 8 ? 2 : 1;
 }
 
 
-- 
cgit v1.2.3


From f0a618ee7c26a3dd54292fbc2bfd914b0d680ed9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Sep 2015 18:21:59 -0700
Subject: i965: Implement "Static Vertex Count" geometry shader optimization.

Broadwell's 3DSTATE_GS contains new "Static Output" and "Static Vertex
Count" fields, which control a new optimization.  Normally, geometry
shaders can output arbitrary numbers of vertices, which means that
resource allocation has to be done on the fly.  However, if the number
of vertices is statically known, the hardware can pre-allocate resources
up front, which is more efficient.

Thanks to the new NIR GS intrinsics, this is easy.  We just call the
function introduced in the previous commit to get the vertex count.
If it obtains a count, we stop emitting the extra 32-bit "Vertex Count"
field in the VUE, and instead fill out the 3DSTATE_GS fields.

Improves performance of Gl32GSCloth by 5.16347% +/- 0.12611% (n=91)
on my Lenovo X250 laptop (Broadwell GT2) at 1024x768.

shader-db statistics for geometry shaders only:

total instructions in shared programs: 3227 -> 3207 (-0.62%)
instructions in affected programs:     242 -> 222 (-8.26%)
helped:                                10

v2: Don't break non-NIR paths (just skip this optimization).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.h           |  5 +++++
 src/mesa/drivers/dri/i965/brw_defines.h           |  5 +++++
 src/mesa/drivers/dri/i965/brw_gs.c                |  5 +++++
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 11 +++++++----
 src/mesa/drivers/dri/i965/gen8_gs_state.c         |  6 ++++++
 5 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index d36891cd310..27e80ea3bd6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -792,6 +792,11 @@ struct brw_gs_prog_data
 
    bool include_primitive_id;
 
+   /**
+    * The number of vertices emitted, if constant - otherwise -1.
+    */
+   int static_vertex_count;
+
    int invocations;
 
    /**
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 7045d2ce043..393f17ac98c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1964,6 +1964,11 @@ enum brw_message_target {
 # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK		INTEL_MASK(25, 16)
 # define GEN6_GS_ENABLE					(1 << 15)
 
+/* Gen8+ DW8 */
+# define GEN8_GS_STATIC_OUTPUT                          (1 << 30)
+# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT              16
+# define GEN8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
+
 /* Gen8+ DW9 */
 # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
 # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 1f219c0eac6..0de36cc9af1 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -73,6 +73,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
    c.prog_data.base.base.nr_params = param_count;
    c.prog_data.base.base.nr_image_params = gs->NumImages;
 
+   if (brw->gen >= 8) {
+      c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+         nir_gs_count_vertices(gp->program.Base.nir);
+   }
+
    if (brw->gen >= 7) {
       if (gp->program.OutputType == GL_POINTS) {
          /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index ff5bd989cc3..acf0501399c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -234,17 +234,20 @@ vec4_gs_visitor::emit_thread_end()
     */
    int base_mrf = 1;
 
+   bool static_vertex_count = c->prog_data.static_vertex_count != -1;
+
    current_annotation = "thread end";
    dst_reg mrf_reg(MRF, base_mrf);
    src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
    vec4_instruction *inst = emit(MOV(mrf_reg, r0));
    inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+   if (devinfo->gen < 8 || !static_vertex_count)
+      emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
       emit_shader_time_end();
    inst = emit(GS_OPCODE_THREAD_END);
    inst->base_mrf = base_mrf;
-   inst->mlen = devinfo->gen >= 8 ? 2 : 1;
+   inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
 }
 
 
@@ -284,7 +287,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
    /* We need to increment Global Offset by 1 to make room for Broadwell's
     * extra "Vertex Count" payload at the beginning of the URB entry.
     */
-   if (devinfo->gen >= 8)
+   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
       inst->offset++;
 
    inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
@@ -421,7 +424,7 @@ vec4_gs_visitor::emit_control_data_bits()
     * URB entry.  Since this is an OWord message, Global Offset is counted
     * in 128-bit units, so we must set it to 2.
     */
-   if (devinfo->gen >= 8)
+   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
       inst->offset = 2;
    inst->base_mrf = base_mrf;
    inst->mlen = 2;
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 81bd3b21778..4195f4cf4a7 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -90,6 +90,12 @@ gen8_upload_gs_state(struct brw_context *brw)
       uint32_t dw8 = brw->gs.prog_data->control_data_format <<
                      HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
 
+      if (brw->gs.prog_data->static_vertex_count != -1) {
+         dw8 |= GEN8_GS_STATIC_OUTPUT |
+                SET_FIELD(brw->gs.prog_data->static_vertex_count,
+                          GEN8_GS_STATIC_VERTEX_COUNT);
+      }
+
       if (brw->gen < 9)
          dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
       else
-- 
cgit v1.2.3


From 08fe5799e61e9251dec163d000709ff33434216d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 24 Sep 2015 23:47:29 -0700
Subject: i965/gs: Allow src0 immediates in GS_OPCODE_SET_WRITE_OFFSET.

GS_OPCODE_SET_WRITE_OFFSET is a MUL with a constant src[1] and special
strides.  We can easily make the generator handle constant src[0]
arguments by instead generating a MOV with the product of both operands.

This isn't necessarily a win in and of itself - instead of a MUL, we
generate a MOV, which should be basically the same cost.  However, we
can probably avoid the earlier MOV to put src[0] into a register.

shader-db statistics for geometry shaders only:

total instructions in shared programs: 3207 -> 3173 (-1.06%)
instructions in affected programs:     3207 -> 3173 (-1.06%)
helped:                                11

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 7 +++++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp        | 9 +++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 5b6444e3210..610caef7dce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -202,6 +202,13 @@ try_constant_propagate(const struct brw_device_info *devinfo,
 	 return true;
       }
       break;
+   case GS_OPCODE_SET_WRITE_OFFSET:
+      /* This is just a multiply by a constant with special strides.
+       * The generator will handle immediates in both arguments (generating
+       * a single MOV of the product).  So feel free to propagate in src0.
+       */
+      inst->src[arg] = value;
+      return true;
 
    case BRW_OPCODE_CMP:
       if (arg == 1) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 50f96632f7f..dcacc900540 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -541,8 +541,13 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
           src1.file == BRW_IMMEDIATE_VALUE &&
           src1.type == BRW_REGISTER_TYPE_UD &&
           src1.dw1.ud <= USHRT_MAX);
-   brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
-           retype(src1, BRW_REGISTER_TYPE_UW));
+   if (src0.file == IMM) {
+      brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
+              brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+   } else {
+      brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
+              retype(src1, BRW_REGISTER_TYPE_UW));
+   }
    brw_set_default_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
 }
-- 
cgit v1.2.3


From d6a41b5f70b9071cca8959afab66a6504e7cb7ce Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Sep 2015 08:21:57 -0700
Subject: i965/gs: Optimize away the EOT write on Gen8+ with static vertex
 count.

With static vertex counts, the final EOT write doesn't actually write
any data - it's just there to end the thread.  Typically, the last
thing before ending the thread will be an EmitVertex() call, resulting
in a URB write.  We can just set EOT on that.

Note that this isn't always possible - there might be an intervening
SSBO write/image store, or the URB write may have been in a loop.

shader-db statistics for geometry shaders only:

total instructions in shared programs: 3173 -> 3149 (-0.76%)
instructions in affected programs:     176 -> 152 (-13.64%)
helped:                                8

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index acf0501399c..d2edc5782fd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -236,6 +236,21 @@ vec4_gs_visitor::emit_thread_end()
 
    bool static_vertex_count = c->prog_data.static_vertex_count != -1;
 
+   /* If the previous instruction was a URB write, we don't need to issue
+    * a second one - we can just set the EOT bit on the previous write.
+    *
+    * Skip this on Gen8+ unless there's a static vertex count, as we also
+    * need to write the vertex count out, and combining the two may not be
+    * possible (or at least not straightforward).
+    */
+   vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
+   if (last && last->opcode == GS_OPCODE_URB_WRITE &&
+       !(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
+       devinfo->gen >= 8 && static_vertex_count) {
+      last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
+      return;
+   }
+
    current_annotation = "thread end";
    dst_reg mrf_reg(MRF, base_mrf);
    src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-- 
cgit v1.2.3


From e413d2fbc4b37eb552ee42940fb792e504233e51 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Mon, 28 Sep 2015 11:03:19 +1000
Subject: glsl: fix component size calculation for tessellation and geom
 shaders

Broken in commit abdab88b30ab when adding arrays of arrays support

Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/glsl/link_varyings.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 0d343d64a2e..7e77a675db1 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -964,7 +964,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
          }
          slots *= type->matrix_columns;
       } else {
-         slots = var->type->matrix_columns;
+         slots = type->matrix_columns;
       }
       this->matches[this->num_matches].num_components = 4 * slots;
    } else {
-- 
cgit v1.2.3


From 6dd059fefe2c8c8a5337a646ab8772ebad8f811b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 26 Sep 2015 13:37:02 -0400
Subject: mesa: don't leak interface_name

Found by Coverity

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/mesa/main/shader_query.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 99d9e1088d0..e020dce288a 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -995,6 +995,7 @@ program_resource_top_level_array_stride(struct gl_shader_program *shProg,
       }
    }
 found_top_level_array_size:
+   free(interface_name);
    free(var_name);
    return array_stride;
 }
-- 
cgit v1.2.3


From 5bff12ecb403356d7848dc02696c618b68a324f1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 26 Sep 2015 13:45:28 -0400
Subject: gallium/util: avoid unreferencing random memory on buffer alloc
 failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Found by Coverity

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Albert Freeman <albertwdfreeman@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 2fbf69c67d2..b7b1ece389b 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -2065,7 +2065,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_vertex_buffer vb = {0};
-   struct pipe_stream_output_target *so_target;
+   struct pipe_stream_output_target *so_target = NULL;
    unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};
 
    assert(num_channels >= 1);
-- 
cgit v1.2.3


From e7ae6d9e141489281295352e43facbc1aeb137d9 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 28 Sep 2015 13:56:28 +0200
Subject: glsl: revert "glsl: atomic counters can be declared as
 buffer-qualified variables"

This reverts commit 586142658e2927a68c.

The specs are not explicit about any restrictions related to the types allowed
on buffer variables, however, the description of opaque types (like atomic
counters) is in conclict with the purpose of buffer variables:

"The opaque types declare variables that are effectively opaque
 handles to other objects. These objects are
 accessed through built-in functions, not through direct reading or
 writing of the declared variable.
 (...)
 Opaque variables cannot be treated as l-values;(...)"

Also, Mesa is already disallowing opaque types in interface blocks anyway, so
that commit was not really achieving anything.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/glsl/ast_to_hir.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index b8d66dd0609..5e69bccd9a4 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2854,7 +2854,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
    }
 
    if (var->type->contains_atomic()) {
-      if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) {
+      if (var->data.mode == ir_var_uniform) {
          if (var->data.explicit_binding) {
             unsigned *offset =
                &state->atomic_counter_offsets[var->data.binding];
@@ -2872,8 +2872,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          }
       } else if (var->data.mode != ir_var_function_in) {
          _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
-                          "function parameters, uniform-qualified or "
-                          "buffer-qualified global variables");
+                          "function parameters or uniform-qualified "
+                          "global variables");
       }
    }
 
-- 
cgit v1.2.3


From 92666b90c0db8bb51fd54bcf22db3d288a5556e7 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Fri, 18 Sep 2015 18:42:55 +0200
Subject: clover: Move down canonicalization of memory object flags into
 validate_flags().

This will be used to share the same logic between buffer and image
creation.

v2: Make memory flag set constants local to validate_flags. (Serge
    Martin)
---
 src/gallium/state_trackers/clover/api/memory.cpp | 79 ++++++++++++------------
 1 file changed, 40 insertions(+), 39 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp
index 1efb95b5ce7..ffd1d9d7e3d 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -28,37 +28,53 @@
 using namespace clover;
 
 namespace {
-   const cl_mem_flags dev_access_flags =
-      CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
-   const cl_mem_flags host_ptr_flags =
-      CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
-   const cl_mem_flags host_access_flags =
-      CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
-   const cl_mem_flags all_mem_flags =
-      dev_access_flags | host_ptr_flags | host_access_flags;
-
-   void
-   validate_flags(cl_mem_flags flags, cl_mem_flags valid) {
-      if ((flags & ~valid) ||
-          util_bitcount(flags & dev_access_flags) > 1 ||
-          util_bitcount(flags & host_access_flags) > 1)
+   cl_mem_flags
+   validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
+      const cl_mem_flags dev_access_flags =
+         CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
+      const cl_mem_flags host_ptr_flags =
+         CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
+      const cl_mem_flags host_access_flags =
+         CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
+      const cl_mem_flags valid_flags =
+         dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
+
+      if ((d_flags & ~valid_flags) ||
+          util_bitcount(d_flags & dev_access_flags) > 1 ||
+          util_bitcount(d_flags & host_access_flags) > 1)
          throw error(CL_INVALID_VALUE);
 
-      if ((flags & CL_MEM_USE_HOST_PTR) &&
-          (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
+      if ((d_flags & CL_MEM_USE_HOST_PTR) &&
+          (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
          throw error(CL_INVALID_VALUE);
+
+      if (d_parent) {
+         const auto &parent = obj(d_parent);
+         const cl_mem_flags flags = (d_flags |
+                                     (d_flags & dev_access_flags ? 0 :
+                                      parent.flags() & dev_access_flags) |
+                                     (d_flags & host_access_flags ? 0 :
+                                      parent.flags() & host_access_flags) |
+                                     (parent.flags() & host_ptr_flags));
+
+         if (~flags & parent.flags() &
+             ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
+            throw error(CL_INVALID_VALUE);
+
+         return flags;
+
+      } else {
+         return d_flags | (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+      }
    }
 }
 
 CLOVER_API cl_mem
 clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+   const cl_mem_flags flags = validate_flags(NULL, d_flags);
    auto &ctx = obj(d_ctx);
 
-   validate_flags(d_flags, all_mem_flags);
-
    if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
                                        CL_MEM_COPY_HOST_PTR)))
       throw error(CL_INVALID_HOST_PTR);
@@ -82,16 +98,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
                   cl_buffer_create_type op,
                   const void *op_info, cl_int *r_errcode) try {
    auto &parent = obj<root_buffer>(d_mem);
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : parent.flags() & dev_access_flags) |
-      (d_flags & host_access_flags ? 0 : parent.flags() & host_access_flags) |
-      (parent.flags() & host_ptr_flags);
-
-   validate_flags(d_flags, dev_access_flags | host_access_flags);
-
-   if (~flags & parent.flags() &
-       ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
-      throw error(CL_INVALID_VALUE);
+   const cl_mem_flags flags = validate_flags(d_mem, d_flags);
 
    if (op == CL_BUFFER_CREATE_TYPE_REGION) {
       auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
@@ -121,12 +128,9 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
                 const cl_image_format *format,
                 size_t width, size_t height, size_t row_pitch,
                 void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+   const cl_mem_flags flags = validate_flags(NULL, d_flags);
    auto &ctx = obj(d_ctx);
 
-   validate_flags(d_flags, all_mem_flags);
-
    if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
       throw error(CL_INVALID_OPERATION);
 
@@ -158,12 +162,9 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags,
                 size_t width, size_t height, size_t depth,
                 size_t row_pitch, size_t slice_pitch,
                 void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+   const cl_mem_flags flags = validate_flags(NULL, d_flags);
    auto &ctx = obj(d_ctx);
 
-   validate_flags(d_flags, all_mem_flags);
-
    if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
       throw error(CL_INVALID_OPERATION);
 
@@ -196,7 +197,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
    auto &ctx = obj(d_ctx);
    auto formats = supported_formats(ctx, type);
 
-   validate_flags(flags, all_mem_flags);
+   validate_flags(NULL, flags);
 
    if (r_buf && !r_count)
       throw error(CL_INVALID_VALUE);
-- 
cgit v1.2.3


From f2c52e392bdaa9ce9b9075996eb5efafde142030 Mon Sep 17 00:00:00 2001
From: Serge Martin <edb+mesa@sigluy.net>
Date: Sat, 19 Sep 2015 23:16:09 +0200
Subject: clover: Implement CL1.2 clCreateImage().

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/gallium/state_trackers/clover/api/memory.cpp | 101 ++++++++++++++++++++---
 1 file changed, 91 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp
index ffd1d9d7e3d..de48908d878 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -123,6 +123,97 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
    return NULL;
 }
 
+CLOVER_API cl_mem
+clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
+              const cl_image_format *format,
+              const cl_image_desc *desc,
+              void *host_ptr, cl_int *r_errcode) try {
+   auto &ctx = obj(d_ctx);
+
+   if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
+      throw error(CL_INVALID_OPERATION);
+
+   if (!format)
+      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+
+   if (!desc)
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (desc->image_array_size == 0 &&
+       (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
+        desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (!host_ptr &&
+       (desc->image_row_pitch || desc->image_slice_pitch))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (desc->num_mip_levels || desc->num_samples)
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (bool(desc->buffer) != (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (bool(host_ptr) != bool(d_flags & (CL_MEM_USE_HOST_PTR |
+                                         CL_MEM_COPY_HOST_PTR)))
+      throw error(CL_INVALID_HOST_PTR);
+
+   const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
+
+   if (!supported_formats(ctx, desc->image_type).count(*format))
+      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+   ret_error(r_errcode, CL_SUCCESS);
+
+   switch (desc->image_type) {
+   case CL_MEM_OBJECT_IMAGE2D:
+      if (!desc->image_width || !desc->image_height)
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      if (all_of([=](const device &dev) {
+               const size_t max = 1 << dev.max_image_levels_2d();
+               return (desc->image_width > max ||
+                       desc->image_height > max);
+            }, ctx.devices()))
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      return new image2d(ctx, flags, format,
+                         desc->image_width, desc->image_height,
+                         desc->image_row_pitch, host_ptr);
+
+   case CL_MEM_OBJECT_IMAGE3D:
+      if (!desc->image_width || !desc->image_height || !desc->image_depth)
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      if (all_of([=](const device &dev) {
+               const size_t max = 1 << dev.max_image_levels_3d();
+               return (desc->image_width > max ||
+                       desc->image_height > max ||
+                       desc->image_depth > max);
+            }, ctx.devices()))
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      return new image3d(ctx, flags, format,
+                         desc->image_width, desc->image_height,
+                         desc->image_depth, desc->image_row_pitch,
+                         desc->image_slice_pitch, host_ptr);
+
+   case CL_MEM_OBJECT_IMAGE1D:
+   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      // XXX - Not implemented.
+      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+   default:
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+   }
+
+} catch (error &e) {
+   ret_error(r_errcode, e);
+   return NULL;
+}
+
 CLOVER_API cl_mem
 clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
                 const cl_image_format *format,
@@ -353,16 +444,6 @@ clSetMemObjectDestructorCallback(cl_mem d_mem,
    return e.get();
 }
 
-CLOVER_API cl_mem
-clCreateImage(cl_context d_ctx, cl_mem_flags flags,
-              const cl_image_format *format,
-              const cl_image_desc *image_desc,
-              void *host_ptr, cl_int *r_errcode) {
-   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
-   ret_error(r_errcode, CL_INVALID_OPERATION);
-   return NULL;
-}
-
 CLOVER_API cl_int
 clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
                     const void *pattern, size_t pattern_size,
-- 
cgit v1.2.3


From 2518645f63e2f66d3638180f44a007541928319c Mon Sep 17 00:00:00 2001
From: Serge Martin <edb+mesa@sigluy.net>
Date: Sat, 19 Sep 2015 23:16:10 +0200
Subject: clover: Implement clCreateImage?D w/ clCreateImage.

Remplace clCreateImage2D and clCreateImage3D implementation with call
to clCreateImage.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/gallium/state_trackers/clover/api/memory.cpp | 60 ++++--------------------
 1 file changed, 8 insertions(+), 52 deletions(-)

(limited to 'src')

diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp
index de48908d878..9b3cd8b1f5c 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -218,33 +218,11 @@ CLOVER_API cl_mem
 clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
                 const cl_image_format *format,
                 size_t width, size_t height, size_t row_pitch,
-                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = validate_flags(NULL, d_flags);
-   auto &ctx = obj(d_ctx);
-
-   if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
-      throw error(CL_INVALID_OPERATION);
-
-   if (!format)
-      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-
-   if (width < 1 || height < 1)
-      throw error(CL_INVALID_IMAGE_SIZE);
-
-   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
-                                       CL_MEM_COPY_HOST_PTR)))
-      throw error(CL_INVALID_HOST_PTR);
-
-   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format))
-      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+                void *host_ptr, cl_int *r_errcode) {
+   const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0,
+                                row_pitch, 0, 0, 0, NULL };
 
-   ret_error(r_errcode, CL_SUCCESS);
-   return new image2d(ctx, flags, format, width, height,
-                      row_pitch, host_ptr);
-
-} catch (error &e) {
-   ret_error(r_errcode, e);
-   return NULL;
+   return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
 }
 
 CLOVER_API cl_mem
@@ -252,33 +230,11 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags,
                 const cl_image_format *format,
                 size_t width, size_t height, size_t depth,
                 size_t row_pitch, size_t slice_pitch,
-                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = validate_flags(NULL, d_flags);
-   auto &ctx = obj(d_ctx);
-
-   if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
-      throw error(CL_INVALID_OPERATION);
-
-   if (!format)
-      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-
-   if (width < 1 || height < 1 || depth < 2)
-      throw error(CL_INVALID_IMAGE_SIZE);
-
-   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
-                                       CL_MEM_COPY_HOST_PTR)))
-      throw error(CL_INVALID_HOST_PTR);
-
-   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format))
-      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+                void *host_ptr, cl_int *r_errcode) {
+   const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0,
+                                row_pitch, slice_pitch, 0, 0, NULL };
 
-   ret_error(r_errcode, CL_SUCCESS);
-   return new image3d(ctx, flags, format, width, height, depth,
-                      row_pitch, slice_pitch, host_ptr);
-
-} catch (error &e) {
-   ret_error(r_errcode, e);
-   return NULL;
+   return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
 }
 
 CLOVER_API cl_int
-- 
cgit v1.2.3


From b61292296bd7e1876fdb64725a783a7e96f6c4c1 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 26 Aug 2015 21:59:46 +0300
Subject: i965/fs: Fix hang on IVB and VLV with image format mismatch.

IVB and VLV hang sporadically when an untyped surface read or write
message is used to access a surface of format other than RAW, as may
happen when there is a mismatch between the format qualifier of the
image uniform and the format of the actual image bound to the
pipeline.  According to the spec this condition gives undefined
results but may not lead to program termination (which is one of the
possible outcomes of the hang).  Fix it by checking at runtime whether
the surface is of the right type.

Fixes the "arb_shader_image_load_store.invalid/format mismatch" piglit
subtest.

Reported-by: Mark Janes <mark.a.janes@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91718
CC: mesa-stable@lists.freedesktop.org
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 42 +++++++++++++++++++---
 1 file changed, 38 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 88f22fa9c7f..534d8490cdf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -312,13 +312,43 @@ namespace {
    }
 
    namespace image_validity {
+      /**
+       * Check whether the bound image is suitable for untyped access.
+       */
+      brw_predicate
+      emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
+                               brw_predicate pred)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* Check whether the first stride component (i.e. the Bpp value)
+             * is greater than four, what on Gen7 indicates that a surface of
+             * type RAW has been bound for untyped access.  Reading or writing
+             * to a surface of type other than RAW using untyped surface
+             * messages causes a hang on IVB and VLV.
+             */
+            set_predicate(pred,
+                          bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+                                  BRW_CONDITIONAL_G));
+
+            return BRW_PREDICATE_NORMAL;
+         } else {
+            /* More recent generations handle the format mismatch
+             * gracefully.
+             */
+            return pred;
+         }
+      }
+
       /**
        * Check whether there is an image bound at the given index and write
        * the comparison result to f0.0.  Returns an appropriate predication
        * mode to use on subsequent image operations.
        */
       brw_predicate
-      emit_surface_check(const fs_builder &bld, const fs_reg &image)
+      emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
       {
          const brw_device_info *devinfo = bld.shader->devinfo;
          const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
@@ -895,7 +925,9 @@ namespace brw {
              * surface read on the result,
              */
             const brw_predicate pred =
-               emit_bounds_check(bld, image, saddr, dims);
+               emit_untyped_image_check(bld, image,
+                                        emit_bounds_check(bld, image,
+                                                          saddr, dims));
 
             /* and they don't know about surface coordinates, we need to
              * convert them to a raw memory offset.
@@ -1041,7 +1073,9 @@ namespace brw {
                 * the surface write on the result,
                 */
                const brw_predicate pred =
-                  emit_bounds_check(bld, image, saddr, dims);
+                  emit_untyped_image_check(bld, image,
+                                           emit_bounds_check(bld, image,
+                                                             saddr, dims));
 
                /* and, phew, they don't know about surface coordinates, we
                 * need to convert them to a raw memory offset.
@@ -1072,7 +1106,7 @@ namespace brw {
          using namespace image_coordinates;
          using namespace surface_access;
          /* Avoid performing an atomic operation on an unbound surface. */
-         const brw_predicate pred = emit_surface_check(bld, image);
+         const brw_predicate pred = emit_typed_atomic_check(bld, image);
 
          /* Transform the image coordinates into actual surface coordinates. */
          const fs_reg saddr =
-- 
cgit v1.2.3


From 255a225265bb29fab828e8f0d1b10246718895c2 Mon Sep 17 00:00:00 2001
From: Alexander von Gluck IV <kallisti5@unixzen.com>
Date: Sat, 26 Sep 2015 11:39:09 -0500
Subject: state_trackers/hgl: Fix missing include path

---
 src/gallium/state_trackers/hgl/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/gallium/state_trackers/hgl/SConscript b/src/gallium/state_trackers/hgl/SConscript
index 82a0ce0a34e..d2389c80802 100644
--- a/src/gallium/state_trackers/hgl/SConscript
+++ b/src/gallium/state_trackers/hgl/SConscript
@@ -9,6 +9,7 @@ env.Append(CPPPATH = [
     '#/src',
     '#/src/mapi',
     '#/src/mesa',
+    '#/include/HaikuGL',
 ])
 
 sources = [
-- 
cgit v1.2.3


From 7cdd818d2ae82f0cc81ebbc58ae6c95e6d649ba2 Mon Sep 17 00:00:00 2001
From: Alexander von Gluck IV <kallisti5@unixzen.com>
Date: Sat, 26 Sep 2015 11:49:36 -0500
Subject: egl: Fix missing Haiku include path

---
 src/egl/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/egl/SConscript b/src/egl/SConscript
index f8102db6f3a..8f8b11a61ce 100644
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -8,6 +8,7 @@ env = env.Clone()
 
 env.Append(CPPPATH = [
     '#/include',
+    '#/include/HaikuGL',
     '#/src/egl/main',
     '#/src',
 ])
-- 
cgit v1.2.3


From 15ab968f62dd322ecda6d70b1069f52616fe39bb Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Thu, 24 Sep 2015 10:57:41 +0200
Subject: mesa: Fix order of format+type and internal format checks for
 glTexImageXD ops

The more specific GLES constrains should be checked after the general
validation performed by _mesa_error_check_format_and_type(). This is also
for consistency with the error checks order of glTexSubImage ops.

v3: The change of order uncovered a bug that regresses a couple of piglit
tests written against OpenGL-ES 1.1 spec, which expects an INVALID_VALUE
instead of the INVALID_ENUM returned by _mesa_error_check_format_and_type()
when an invalid format is passed to glTexImage2D. This version of the patch
accounts for those cases.

Fixes 1 dEQP test:
* dEQP-GLES3.functional.negative_api.texture.teximage2d

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Tested-by: Mark Janes <mark.a.janes@intel.com>
---
 src/mesa/main/teximage.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 9bc176acf04..a0dbb48a928 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2164,15 +2164,23 @@ texture_error_check( struct gl_context *ctx,
       return GL_TRUE;
    }
 
-   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
-    * combinations of format, internalFormat, and type that can be used.
-    * Formats and types that require additional extensions (e.g., GL_FLOAT
-    * requires GL_OES_texture_float) are filtered elsewhere.
-    */
-   if (_mesa_is_gles(ctx) &&
-       texture_format_error_check_gles(ctx, format, type, internalFormat,
-                                       dimensions, "glTexImage%dD")) {
-     return GL_TRUE;
+   /* Check incoming image format and type */
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      /* Prior to OpenGL-ES 2.0, an INVALID_VALUE is expected instead of
+       * INVALID_ENUM. From page 73 OpenGL ES 1.1 spec:
+       *
+       *     "Specifying a value for internalformat that is not one of the
+       *      above (acceptable) values generates the error INVALID VALUE."
+       */
+      if (err == GL_INVALID_ENUM && _mesa_is_gles(ctx) && ctx->Version < 20)
+         err = GL_INVALID_VALUE;
+
+      _mesa_error(ctx, err,
+                  "glTexImage%dD(incompatible format = %s, type = %s)",
+                  dimensions, _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
+      return GL_TRUE;
    }
 
    /* Check internalFormat */
@@ -2183,13 +2191,14 @@ texture_error_check( struct gl_context *ctx,
       return GL_TRUE;
    }
 
-   /* Check incoming image format and type */
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err,
-                  "glTexImage%dD(incompatible format = %s, type = %s)",
-                  dimensions, _mesa_enum_to_string(format),
-                  _mesa_enum_to_string(type));
+   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
+    * combinations of format, internalFormat, and type that can be used.
+    * Formats and types that require additional extensions (e.g., GL_FLOAT
+    * requires GL_OES_texture_float) are filtered elsewhere.
+    */
+   if (_mesa_is_gles(ctx) &&
+       texture_format_error_check_gles(ctx, format, type, internalFormat,
+                                       dimensions, "glTexImage%dD")) {
       return GL_TRUE;
    }
 
-- 
cgit v1.2.3


From c6bf1cd1467ea5d5370394ba99366dd8a59a385c Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Thu, 24 Sep 2015 10:57:42 +0200
Subject: mesa: Move _mesa_base_tex_format() from teximage to glformats files

This function will be needed as part of validating the combination of format,
type and internal format of texture pixel operations, which happens in
glformats files. Specifically, we want to be able to obtain the base format
of a resolved effective internal format, to compare it with the original
internal format passed.

Also, since this function deals solely with GL formats, it fits better in
glformats where the rest of similar format functionality rests.

The function is moved as-is, without any modification.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Tested-by: Mark Janes <mark.a.janes@intel.com>
---
 src/mesa/main/glformats.c | 505 ++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/main/glformats.h |   2 +
 src/mesa/main/teximage.c  | 374 ----------------------------------
 src/mesa/main/teximage.h  |   4 -
 4 files changed, 507 insertions(+), 378 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 6cfffdb4d51..515b06e14eb 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2139,6 +2139,511 @@ _mesa_es_error_check_format_and_type(GLenum format, GLenum type,
    return type_valid ? GL_NO_ERROR : GL_INVALID_OPERATION;
 }
 
+/**
+ * Return the simple base format for a given internal texture format.
+ * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
+ *
+ * \param ctx GL context.
+ * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
+ *
+ * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
+ * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
+ *
+ * This is the format which is used during texture application (i.e. the
+ * texture format and env mode determine the arithmetic used.
+ */
+GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
+{
+   switch (internalFormat) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
+   case 3:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return GL_RGB;
+   case 4:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
+    */
+   if (_mesa_is_gles(ctx)) {
+      switch (internalFormat) {
+      case GL_BGRA:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_ES2_compatibility) {
+      switch (internalFormat) {
+      case GL_RGB565:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_texture) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT:
+      case GL_DEPTH_COMPONENT16:
+      case GL_DEPTH_COMPONENT24:
+      case GL_DEPTH_COMPONENT32:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH_STENCIL:
+      case GL_DEPTH24_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_stencil8) {
+      switch (internalFormat) {
+      case GL_STENCIL_INDEX:
+      case GL_STENCIL_INDEX1:
+      case GL_STENCIL_INDEX4:
+      case GL_STENCIL_INDEX8:
+      case GL_STENCIL_INDEX16:
+         return GL_STENCIL_INDEX;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   switch (internalFormat) {
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+   case GL_COMPRESSED_LUMINANCE:
+      return GL_LUMINANCE;
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return GL_LUMINANCE_ALPHA;
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+   case GL_COMPRESSED_RGB:
+      return GL_RGB;
+   case GL_COMPRESSED_RGBA:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_FXT1_3DFX:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_FXT1_3DFX:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   /* Assume that the ANGLE flag will always be set if the EXT flag is set.
+    */
+   if (ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx)
+       && ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_RGB_S3TC:
+      case GL_RGB4_S3TC:
+         return GL_RGB;
+      case GL_RGBA_S3TC:
+      case GL_RGBA4_S3TC:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.MESA_ycbcr_texture) {
+      if (internalFormat == GL_YCBCR_MESA)
+         return GL_YCBCR_MESA;
+   }
+
+   if (ctx->Extensions.ARB_texture_float) {
+      switch (internalFormat) {
+      case GL_ALPHA16F_ARB:
+      case GL_ALPHA32F_ARB:
+         return GL_ALPHA;
+      case GL_RGBA16F_ARB:
+      case GL_RGBA32F_ARB:
+         return GL_RGBA;
+      case GL_RGB16F_ARB:
+      case GL_RGB32F_ARB:
+         return GL_RGB;
+      case GL_INTENSITY16F_ARB:
+      case GL_INTENSITY32F_ARB:
+         return GL_INTENSITY;
+      case GL_LUMINANCE16F_ARB:
+      case GL_LUMINANCE32F_ARB:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA16F_ARB:
+      case GL_LUMINANCE_ALPHA32F_ARB:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_snorm) {
+      switch (internalFormat) {
+      case GL_RED_SNORM:
+      case GL_R8_SNORM:
+      case GL_R16_SNORM:
+         return GL_RED;
+      case GL_RG_SNORM:
+      case GL_RG8_SNORM:
+      case GL_RG16_SNORM:
+         return GL_RG;
+      case GL_RGB_SNORM:
+      case GL_RGB8_SNORM:
+      case GL_RGB16_SNORM:
+         return GL_RGB;
+      case GL_RGBA_SNORM:
+      case GL_RGBA8_SNORM:
+      case GL_RGBA16_SNORM:
+         return GL_RGBA;
+      case GL_ALPHA_SNORM:
+      case GL_ALPHA8_SNORM:
+      case GL_ALPHA16_SNORM:
+         return GL_ALPHA;
+      case GL_LUMINANCE_SNORM:
+      case GL_LUMINANCE8_SNORM:
+      case GL_LUMINANCE16_SNORM:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA_SNORM:
+      case GL_LUMINANCE8_ALPHA8_SNORM:
+      case GL_LUMINANCE16_ALPHA16_SNORM:
+         return GL_LUMINANCE_ALPHA;
+      case GL_INTENSITY_SNORM:
+      case GL_INTENSITY8_SNORM:
+      case GL_INTENSITY16_SNORM:
+         return GL_INTENSITY;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_sRGB) {
+      switch (internalFormat) {
+      case GL_SRGB_EXT:
+      case GL_SRGB8_EXT:
+      case GL_COMPRESSED_SRGB_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
+      case GL_SRGB_ALPHA_EXT:
+      case GL_SRGB8_ALPHA8_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_EXT:
+         return GL_RGBA;
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
+      case GL_SLUMINANCE_ALPHA_EXT:
+      case GL_SLUMINANCE8_ALPHA8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+         return GL_LUMINANCE_ALPHA;
+      case GL_SLUMINANCE_EXT:
+      case GL_SLUMINANCE8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_EXT:
+         return GL_LUMINANCE;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Version >= 30 ||
+       ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_RGBA8UI_EXT:
+      case GL_RGBA16UI_EXT:
+      case GL_RGBA32UI_EXT:
+      case GL_RGBA8I_EXT:
+      case GL_RGBA16I_EXT:
+      case GL_RGBA32I_EXT:
+      case GL_RGB10_A2UI:
+         return GL_RGBA;
+      case GL_RGB8UI_EXT:
+      case GL_RGB16UI_EXT:
+      case GL_RGB32UI_EXT:
+      case GL_RGB8I_EXT:
+      case GL_RGB16I_EXT:
+      case GL_RGB32I_EXT:
+         return GL_RGB;
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_ALPHA8UI_EXT:
+      case GL_ALPHA16UI_EXT:
+      case GL_ALPHA32UI_EXT:
+      case GL_ALPHA8I_EXT:
+      case GL_ALPHA16I_EXT:
+      case GL_ALPHA32I_EXT:
+         return GL_ALPHA;
+      case GL_INTENSITY8UI_EXT:
+      case GL_INTENSITY16UI_EXT:
+      case GL_INTENSITY32UI_EXT:
+      case GL_INTENSITY8I_EXT:
+      case GL_INTENSITY16I_EXT:
+      case GL_INTENSITY32I_EXT:
+         return GL_INTENSITY;
+      case GL_LUMINANCE8UI_EXT:
+      case GL_LUMINANCE16UI_EXT:
+      case GL_LUMINANCE32UI_EXT:
+      case GL_LUMINANCE8I_EXT:
+      case GL_LUMINANCE16I_EXT:
+      case GL_LUMINANCE32I_EXT:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA8UI_EXT:
+      case GL_LUMINANCE_ALPHA16UI_EXT:
+      case GL_LUMINANCE_ALPHA32UI_EXT:
+      case GL_LUMINANCE_ALPHA8I_EXT:
+      case GL_LUMINANCE_ALPHA16I_EXT:
+      case GL_LUMINANCE_ALPHA32I_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_rg) {
+      switch (internalFormat) {
+      case GL_R16F:
+      case GL_R32F:
+	 if (!ctx->Extensions.ARB_texture_float)
+	    break;
+         return GL_RED;
+      case GL_R8I:
+      case GL_R8UI:
+      case GL_R16I:
+      case GL_R16UI:
+      case GL_R32I:
+      case GL_R32UI:
+	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+	    break;
+	 /* FALLTHROUGH */
+      case GL_R8:
+      case GL_R16:
+      case GL_RED:
+      case GL_COMPRESSED_RED:
+         return GL_RED;
+
+      case GL_RG16F:
+      case GL_RG32F:
+	 if (!ctx->Extensions.ARB_texture_float)
+	    break;
+         return GL_RG;
+      case GL_RG8I:
+      case GL_RG8UI:
+      case GL_RG16I:
+      case GL_RG16UI:
+      case GL_RG32I:
+      case GL_RG32UI:
+	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+	    break;
+	 /* FALLTHROUGH */
+      case GL_RG:
+      case GL_RG8:
+      case GL_RG16:
+      case GL_COMPRESSED_RG:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_shared_exponent) {
+      switch (internalFormat) {
+      case GL_RGB9_E5_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_packed_float) {
+      switch (internalFormat) {
+      case GL_R11F_G11F_B10F_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_buffer_float) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT32F:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH32F_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_compression_rgtc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RED_RGTC1:
+      case GL_COMPRESSED_SIGNED_RED_RGTC1:
+         return GL_RED;
+      case GL_COMPRESSED_RG_RGTC2:
+      case GL_COMPRESSED_SIGNED_RG_RGTC2:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_compression_latc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+         return GL_LUMINANCE;
+      case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ATI_texture_compression_3dc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
+      switch (internalFormat) {
+      case GL_ETC1_RGB8_OES:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB8_ETC2:
+      case GL_COMPRESSED_SRGB8_ETC2:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA8_ETC2_EAC:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+      case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+      case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+         return GL_RGBA;
+      case GL_COMPRESSED_R11_EAC:
+      case GL_COMPRESSED_SIGNED_R11_EAC:
+         return GL_RED;
+      case GL_COMPRESSED_RG11_EAC:
+      case GL_COMPRESSED_SIGNED_RG11_EAC:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx) &&
+       ctx->Extensions.ARB_texture_compression_bptc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGBA_BPTC_UNORM:
+      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+         return GL_RGBA;
+      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->API == API_OPENGLES) {
+      switch (internalFormat) {
+      case GL_PALETTE4_RGB8_OES:
+      case GL_PALETTE4_R5_G6_B5_OES:
+      case GL_PALETTE8_RGB8_OES:
+      case GL_PALETTE8_R5_G6_B5_OES:
+	 return GL_RGB;
+      case GL_PALETTE4_RGBA8_OES:
+      case GL_PALETTE8_RGB5_A1_OES:
+      case GL_PALETTE4_RGBA4_OES:
+      case GL_PALETTE4_RGB5_A1_OES:
+      case GL_PALETTE8_RGBA8_OES:
+      case GL_PALETTE8_RGBA4_OES:
+	 return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   return -1; /* error */
+}
 
 /**
  * Do error checking of format/type combinations for OpenGL ES 3
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h
index aec905d2342..92f4bc6864e 100644
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -134,6 +134,8 @@ extern GLenum
 _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                       GLenum format, GLenum type,
                                       GLenum internalFormat);
+extern GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );
 
 extern uint32_t
 _mesa_format_from_format_and_type(GLenum format, GLenum type);
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index a0dbb48a928..d9453e3a281 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -116,380 +116,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type)
    return format;
 }
 
-/**
- * Return the simple base format for a given internal texture format.
- * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
- *
- * \param ctx GL context.
- * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
- *
- * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
- * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
- *
- * This is the format which is used during texture application (i.e. the
- * texture format and env mode determine the arithmetic used.
- */
-GLint
-_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
-{
-   switch (internalFormat) {
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
-   case 3:
-      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
-   case GL_RGB:
-   case GL_R3_G3_B2:
-   case GL_RGB4:
-   case GL_RGB5:
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return GL_RGB;
-   case 4:
-      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
-   case GL_RGBA:
-   case GL_RGBA2:
-   case GL_RGBA4:
-   case GL_RGB5_A1:
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return GL_RGBA;
-   default:
-      ; /* fallthrough */
-   }
-
-   /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
-    */
-   if (_mesa_is_gles(ctx)) {
-      switch (internalFormat) {
-      case GL_BGRA:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_ES2_compatibility) {
-      switch (internalFormat) {
-      case GL_RGB565:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_depth_texture) {
-      switch (internalFormat) {
-      case GL_DEPTH_COMPONENT:
-      case GL_DEPTH_COMPONENT16:
-      case GL_DEPTH_COMPONENT24:
-      case GL_DEPTH_COMPONENT32:
-         return GL_DEPTH_COMPONENT;
-      case GL_DEPTH_STENCIL:
-      case GL_DEPTH24_STENCIL8:
-         return GL_DEPTH_STENCIL;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_texture_stencil8) {
-      switch (internalFormat) {
-      case GL_STENCIL_INDEX:
-      case GL_STENCIL_INDEX1:
-      case GL_STENCIL_INDEX4:
-      case GL_STENCIL_INDEX8:
-      case GL_STENCIL_INDEX16:
-         return GL_STENCIL_INDEX;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   switch (internalFormat) {
-   case GL_COMPRESSED_ALPHA:
-      return GL_ALPHA;
-   case GL_COMPRESSED_LUMINANCE:
-      return GL_LUMINANCE;
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return GL_LUMINANCE_ALPHA;
-   case GL_COMPRESSED_INTENSITY:
-      return GL_INTENSITY;
-   case GL_COMPRESSED_RGB:
-      return GL_RGB;
-   case GL_COMPRESSED_RGBA:
-      return GL_RGBA;
-   default:
-      ; /* fallthrough */
-   }
-
-   if (_mesa_is_compressed_format(ctx, internalFormat)) {
-      GLenum base_compressed =
-         _mesa_gl_compressed_format_base_format(internalFormat);
-      if (base_compressed)
-            return base_compressed;
-   }
-
-   if (ctx->Extensions.MESA_ycbcr_texture) {
-      if (internalFormat == GL_YCBCR_MESA)
-         return GL_YCBCR_MESA;
-   }
-
-   if (ctx->Extensions.ARB_texture_float) {
-      switch (internalFormat) {
-      case GL_ALPHA16F_ARB:
-      case GL_ALPHA32F_ARB:
-         return GL_ALPHA;
-      case GL_RGBA16F_ARB:
-      case GL_RGBA32F_ARB:
-         return GL_RGBA;
-      case GL_RGB16F_ARB:
-      case GL_RGB32F_ARB:
-         return GL_RGB;
-      case GL_INTENSITY16F_ARB:
-      case GL_INTENSITY32F_ARB:
-         return GL_INTENSITY;
-      case GL_LUMINANCE16F_ARB:
-      case GL_LUMINANCE32F_ARB:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA16F_ARB:
-      case GL_LUMINANCE_ALPHA32F_ARB:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_snorm) {
-      switch (internalFormat) {
-      case GL_RED_SNORM:
-      case GL_R8_SNORM:
-      case GL_R16_SNORM:
-         return GL_RED;
-      case GL_RG_SNORM:
-      case GL_RG8_SNORM:
-      case GL_RG16_SNORM:
-         return GL_RG;
-      case GL_RGB_SNORM:
-      case GL_RGB8_SNORM:
-      case GL_RGB16_SNORM:
-         return GL_RGB;
-      case GL_RGBA_SNORM:
-      case GL_RGBA8_SNORM:
-      case GL_RGBA16_SNORM:
-         return GL_RGBA;
-      case GL_ALPHA_SNORM:
-      case GL_ALPHA8_SNORM:
-      case GL_ALPHA16_SNORM:
-         return GL_ALPHA;
-      case GL_LUMINANCE_SNORM:
-      case GL_LUMINANCE8_SNORM:
-      case GL_LUMINANCE16_SNORM:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA_SNORM:
-      case GL_LUMINANCE8_ALPHA8_SNORM:
-      case GL_LUMINANCE16_ALPHA16_SNORM:
-         return GL_LUMINANCE_ALPHA;
-      case GL_INTENSITY_SNORM:
-      case GL_INTENSITY8_SNORM:
-      case GL_INTENSITY16_SNORM:
-         return GL_INTENSITY;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_sRGB) {
-      switch (internalFormat) {
-      case GL_SRGB_EXT:
-      case GL_SRGB8_EXT:
-      case GL_COMPRESSED_SRGB_EXT:
-         return GL_RGB;
-      case GL_SRGB_ALPHA_EXT:
-      case GL_SRGB8_ALPHA8_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_EXT:
-         return GL_RGBA;
-      case GL_SLUMINANCE_ALPHA_EXT:
-      case GL_SLUMINANCE8_ALPHA8_EXT:
-      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
-         return GL_LUMINANCE_ALPHA;
-      case GL_SLUMINANCE_EXT:
-      case GL_SLUMINANCE8_EXT:
-      case GL_COMPRESSED_SLUMINANCE_EXT:
-         return GL_LUMINANCE;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Version >= 30 ||
-       ctx->Extensions.EXT_texture_integer) {
-      switch (internalFormat) {
-      case GL_RGBA8UI_EXT:
-      case GL_RGBA16UI_EXT:
-      case GL_RGBA32UI_EXT:
-      case GL_RGBA8I_EXT:
-      case GL_RGBA16I_EXT:
-      case GL_RGBA32I_EXT:
-      case GL_RGB10_A2UI:
-         return GL_RGBA;
-      case GL_RGB8UI_EXT:
-      case GL_RGB16UI_EXT:
-      case GL_RGB32UI_EXT:
-      case GL_RGB8I_EXT:
-      case GL_RGB16I_EXT:
-      case GL_RGB32I_EXT:
-         return GL_RGB;
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_integer) {
-      switch (internalFormat) {
-      case GL_ALPHA8UI_EXT:
-      case GL_ALPHA16UI_EXT:
-      case GL_ALPHA32UI_EXT:
-      case GL_ALPHA8I_EXT:
-      case GL_ALPHA16I_EXT:
-      case GL_ALPHA32I_EXT:
-         return GL_ALPHA;
-      case GL_INTENSITY8UI_EXT:
-      case GL_INTENSITY16UI_EXT:
-      case GL_INTENSITY32UI_EXT:
-      case GL_INTENSITY8I_EXT:
-      case GL_INTENSITY16I_EXT:
-      case GL_INTENSITY32I_EXT:
-         return GL_INTENSITY;
-      case GL_LUMINANCE8UI_EXT:
-      case GL_LUMINANCE16UI_EXT:
-      case GL_LUMINANCE32UI_EXT:
-      case GL_LUMINANCE8I_EXT:
-      case GL_LUMINANCE16I_EXT:
-      case GL_LUMINANCE32I_EXT:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA8UI_EXT:
-      case GL_LUMINANCE_ALPHA16UI_EXT:
-      case GL_LUMINANCE_ALPHA32UI_EXT:
-      case GL_LUMINANCE_ALPHA8I_EXT:
-      case GL_LUMINANCE_ALPHA16I_EXT:
-      case GL_LUMINANCE_ALPHA32I_EXT:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_texture_rg) {
-      switch (internalFormat) {
-      case GL_R16F:
-      case GL_R32F:
-	 if (!ctx->Extensions.ARB_texture_float)
-	    break;
-         return GL_RED;
-      case GL_R8I:
-      case GL_R8UI:
-      case GL_R16I:
-      case GL_R16UI:
-      case GL_R32I:
-      case GL_R32UI:
-	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-	    break;
-	 /* FALLTHROUGH */
-      case GL_R8:
-      case GL_R16:
-      case GL_RED:
-      case GL_COMPRESSED_RED:
-         return GL_RED;
-
-      case GL_RG16F:
-      case GL_RG32F:
-	 if (!ctx->Extensions.ARB_texture_float)
-	    break;
-         return GL_RG;
-      case GL_RG8I:
-      case GL_RG8UI:
-      case GL_RG16I:
-      case GL_RG16UI:
-      case GL_RG32I:
-      case GL_RG32UI:
-	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-	    break;
-	 /* FALLTHROUGH */
-      case GL_RG:
-      case GL_RG8:
-      case GL_RG16:
-      case GL_COMPRESSED_RG:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_shared_exponent) {
-      switch (internalFormat) {
-      case GL_RGB9_E5_EXT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_packed_float) {
-      switch (internalFormat) {
-      case GL_R11F_G11F_B10F_EXT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_depth_buffer_float) {
-      switch (internalFormat) {
-      case GL_DEPTH_COMPONENT32F:
-         return GL_DEPTH_COMPONENT;
-      case GL_DEPTH32F_STENCIL8:
-         return GL_DEPTH_STENCIL;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
-       _mesa_is_astc_format(internalFormat))
-         return GL_RGBA;
-
-   return -1; /* error */
-}
-
-
 /**
  * For cube map faces, return a face index in [0,5].
  * For other targets return 0;
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index a4347204962..5df36c59a28 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -59,10 +59,6 @@ _mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
 /** \name Internal functions */
 /*@{*/
 
-extern GLint
-_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat);
-
-
 extern GLboolean
 _mesa_is_proxy_texture(GLenum target);
 
-- 
cgit v1.2.3


From 5edd9961c15a80d557ba42f48c97a471b23d9c5e Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Thu, 24 Sep 2015 10:57:43 +0200
Subject: mesa: Use the effective internal format instead for validation

When validating format+type+internalFormat for texture pixel operations
on GLES3, the effective internal format should be used if the one
specified is an unsized internal format. Page 127, section "3.8 Texturing"
of the GLES 3.0.4 spec says:

    "if internalformat is a base internal format, the effective internal
     format is a sized internal format that is derived from the format and
     type for internal use by the GL. Table 3.12 specifies the mapping of
     format and type to effective internal formats. The effective internal
     format is used by the GL for purposes such as texture completeness or
     type checks for CopyTex* commands. In these cases, the GL is required
     to operate as if the effective internal format was used as the
     internalformat when specifying the texture data."

v2: Per the spec, Luminance8Alpha8, Luminance8 and Alpha8 should not be
considered sized internal formats. Return the corresponding unsize format
instead.

v4: * Improved comments in
      _mesa_es3_effective_internal_format_for_format_and_type().
    * Splitted patch to separate chunk about reordering of
      error_check_subtexture_dimensions() error check, which is not directly
      related with this patch.
v5: Dropped the splitted patch because it was actually a work around 3
    dEQP tests that are buggy:

    dEQP-GLES2.functional.negative_api.texture.texsubimage2d_neg_offset
    dEQP-GLES2.functional.negative_api.texture.texsubimage2d_offset_allowed
    dEQP-GLES2.functional.negative_api.texture.texsubimage2d_neg_wdt_hgt

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Tested-by: Mark Janes <mark.a.janes@intel.com>
---
 src/mesa/main/glformats.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 515b06e14eb..7dab33c0051 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2645,6 +2645,127 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
    return -1; /* error */
 }
 
+/**
+ * Returns the effective internal format from a texture format and type.
+ * This is used by texture image operations internally for validation, when
+ * the specified internal format is a base (unsized) format.
+ *
+ * This method will only return a valid effective internal format if the
+ * combination of format, type and internal format in base form, is acceptable.
+ *
+ * If a single sized internal format is defined in the spec (OpenGL-ES 3.0.4) or
+ * in extensions, to unambiguously correspond to the given base format, then
+ * that internal format is returned as the effective. Otherwise, if the
+ * combination is accepted but a single effective format is not defined, the
+ * passed base format will be returned instead.
+ *
+ * \param format the texture format
+ * \param type the texture type
+ */
+static GLenum
+_mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
+                                                        GLenum type)
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:
+      switch (format) {
+      case GL_RGBA:
+         return GL_RGBA8;
+      case GL_RGB:
+         return GL_RGB8;
+      /* Although LUMINANCE_ALPHA, LUMINANCE and ALPHA appear in table 3.12,
+       * (section 3.8 Texturing, page 128 of the OpenGL-ES 3.0.4) as effective
+       * internal formats, they do not correspond to GL constants, so the base
+       * format is returned instead.
+       */
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+         return format;
+      }
+      break;
+
+   case GL_UNSIGNED_SHORT_4_4_4_4:
+      if (format == GL_RGBA)
+         return GL_RGBA4;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_5_5_1:
+      if (format == GL_RGBA)
+         return GL_RGB5_A1;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_6_5:
+      if (format == GL_RGB)
+         return GL_RGB565;
+      break;
+
+   /* OES_packed_depth_stencil */
+   case GL_UNSIGNED_INT_24_8:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH24_STENCIL8;
+      break;
+
+   case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH32F_STENCIL8;
+      break;
+
+   case GL_UNSIGNED_SHORT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT16;
+      break;
+
+   case GL_UNSIGNED_INT:
+      /* It can be DEPTH_COMPONENT16 or DEPTH_COMPONENT24, so just return
+       * the format.
+       */
+      if (format == GL_DEPTH_COMPONENT)
+         return format;
+      break;
+
+   /* OES_texture_float and OES_texture_half_float */
+   case GL_FLOAT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT32F;
+      /* fall through */
+   case GL_HALF_FLOAT_OES:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+      case GL_RED:
+      case GL_RG:
+         return format;
+      }
+      break;
+   case GL_HALF_FLOAT:
+      switch (format) {
+      case GL_RG:
+      case GL_RED:
+         return format;
+      }
+      break;
+
+   /* GL_EXT_texture_type_2_10_10_10_REV */
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+         return format;
+      }
+      break;
+
+   default:
+      /* fall through and return NONE */
+      break;
+   }
+
+   return GL_NONE;
+}
+
 /**
  * Do error checking of format/type combinations for OpenGL ES 3
  * glTex[Sub]Image.
@@ -2655,6 +2776,36 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                       GLenum format, GLenum type,
                                       GLenum internalFormat)
 {
+   /* If internalFormat is an unsized format, then the effective internal
+    * format derived from format and type should be used instead. Page 127,
+    * section "3.8 Texturing" of the GLES 3.0.4 spec states:
+    *
+    *    "if internalformat is a base internal format, the effective
+    *     internal format is a sized internal format that is derived
+    *     from the format and type for internal use by the GL.
+    *     Table 3.12 specifies the mapping of format and type to effective
+    *     internal formats. The effective internal format is used by the GL
+    *     for purposes such as texture completeness or type checks for
+    *     CopyTex* commands. In these cases, the GL is required to operate
+    *     as if the effective internal format was used as the internalformat
+    *     when specifying the texture data."
+    */
+   if (_mesa_is_enum_format_unsized(internalFormat)) {
+      GLenum effectiveInternalFormat =
+         _mesa_es3_effective_internal_format_for_format_and_type(format, type);
+
+      if (effectiveInternalFormat == GL_NONE)
+         return GL_INVALID_OPERATION;
+
+      GLenum baseInternalFormat =
+         _mesa_base_tex_format(ctx, effectiveInternalFormat);
+
+      if (internalFormat != baseInternalFormat)
+         return GL_INVALID_OPERATION;
+
+      internalFormat = effectiveInternalFormat;
+   }
+
    switch (format) {
    case GL_RGBA:
       switch (type) {
-- 
cgit v1.2.3


From 21fdc59d349eb396b48f0919dfd1a8dc234b96b1 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 18 Aug 2015 15:48:56 -0700
Subject: i965: Add a helper function intel_get_tile_dims()

V2:
- Do the tile width/height computations in the new helper
  function and use it later in intel_miptree_get_tile_masks().
- Change the name to intel_get_tile_dims().

V3: Return the tile_h in number of rows in place of bytes.
    Document the units of tile_w, tile_h parameters.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 82 ++++++++++++++++++++-------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 ++
 2 files changed, 64 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 2150708fc04..ee5904d6a48 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -557,35 +557,15 @@ static unsigned long
 intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
                         unsigned long *pitch)
 {
-   const uint32_t bpp = mt->cpp * 8;
-   const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
    uint32_t tile_width, tile_height;
    unsigned long stride, size, aligned_y;
 
    assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
-
-   switch (bpp) {
-   case 8:
-      tile_height = 64;
-      break;
-   case 16:
-   case 32:
-      tile_height = 32;
-      break;
-   case 64:
-   case 128:
-      tile_height = 16;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
-      tile_height *= 4;
+   intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp,
+                       &tile_width, &tile_height);
 
    aligned_y = ALIGN(mt->total_height, tile_height);
    stride = mt->total_width * mt->cpp;
-   tile_width = tile_height * mt->cpp * aspect_ratio;
    stride = ALIGN(stride, tile_width);
    size = stride * aligned_y;
 
@@ -1075,6 +1055,64 @@ intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
    *y = mt->level[level].slice[slice].y_offset;
 }
 
+
+/**
+ * This function computes the tile_w (in bytes) and tile_h (in rows) of
+ * different tiling patterns. If the BO is untiled, tile_w is set to cpp
+ * and tile_h is set to 1.
+ */
+void
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                    uint32_t *tile_w, uint32_t *tile_h)
+{
+   if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
+      switch (tiling) {
+      case I915_TILING_X:
+         *tile_w = 512;
+         *tile_h = 8;
+         break;
+      case I915_TILING_Y:
+         *tile_w = 128;
+         *tile_h = 32;
+         break;
+      case I915_TILING_NONE:
+         *tile_w = cpp;
+         *tile_h = 1;
+         break;
+      default:
+         unreachable("not reached");
+      }
+   } else {
+      uint32_t aspect_ratio = 1;
+      assert(_mesa_is_pow_two(cpp));
+
+      switch (cpp) {
+      case 1:
+         *tile_h = 64;
+         break;
+      case 2:
+      case 4:
+         *tile_h = 32;
+         break;
+      case 8:
+      case 16:
+         *tile_h = 16;
+         break;
+      default:
+         unreachable("not reached");
+      }
+
+      if (cpp == 2 || cpp == 8)
+         aspect_ratio = 2;
+
+      if (tr_mode == INTEL_MIPTREE_TRMODE_YS)
+         *tile_h *= 4;
+
+      *tile_w = *tile_h * aspect_ratio * cpp;
+   }
+}
+
+
 /**
  * This function computes masks that may be used to select the bits of the X
  * and Y coordinates that indicate the offset within a tile.  If the BO is
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bcf6d00691a..294e3018520 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -627,6 +627,10 @@ intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
                              uint32_t *mask_x, uint32_t *mask_y,
                              bool map_stencil_as_y_tiled);
 
+void
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                    uint32_t *tile_w, uint32_t *tile_h);
+
 uint32_t
 intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
                                GLuint level, GLuint slice,
-- 
cgit v1.2.3


From 1dc41be9ebd07825836b0ca4b98e00ffc7ecc0ec Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 18 Aug 2015 15:47:13 -0700
Subject: i965: Use intel_get_tile_dims() to get tile masks

This will require change in the parameters passed to
intel_miptree_get_tile_masks().

V2: Rearrange the order of parameters. (Ben)
    Change the name to intel_get_tile_masks(). (Topi)

V3: Use temporary variables in intel_get_tile_masks()
    for clarity. Fix mask_y computation.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/brw_blorp.cpp       |  4 +++-
 src/mesa/drivers/dri/i965/brw_misc_state.c    | 20 +++++++++++------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 31 ++++++++-------------------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  6 +++---
 4 files changed, 28 insertions(+), 33 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index eac1f005496..df2969dbb76 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -144,7 +144,9 @@ brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x,
 {
    uint32_t mask_x, mask_y;
 
-   intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, map_stencil_as_y_tiled);
+   intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp,
+                        map_stencil_as_y_tiled,
+                        &mask_x, &mask_y);
 
    *tile_x = x_offset & mask_x;
    *tile_y = y_offset & mask_y;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 27511525bff..7d17edb9023 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -174,13 +174,17 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
    uint32_t tile_mask_x = 0, tile_mask_y = 0;
 
    if (depth_mt) {
-      intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
+      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
+                           depth_mt->cpp, false,
+                           &tile_mask_x, &tile_mask_y);
 
       if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
          uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
-         intel_miptree_get_tile_masks(depth_mt->hiz_buf->mt,
-                                      &hiz_tile_mask_x, &hiz_tile_mask_y,
-                                      false);
+         intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
+                              depth_mt->hiz_buf->mt->tr_mode,
+                              depth_mt->hiz_buf->mt->cpp,
+                              false, &hiz_tile_mask_x,
+                              &hiz_tile_mask_y);
 
          /* Each HiZ row represents 2 rows of pixels */
          hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
@@ -200,9 +204,11 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
          tile_mask_y |= 63;
       } else {
          uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
-         intel_miptree_get_tile_masks(stencil_mt,
-                                      &stencil_tile_mask_x,
-                                      &stencil_tile_mask_y, false);
+         intel_get_tile_masks(stencil_mt->tiling,
+                              stencil_mt->tr_mode,
+                              stencil_mt->cpp,
+                              false, &stencil_tile_mask_x,
+                              &stencil_tile_mask_y);
 
          tile_mask_x |= stencil_tile_mask_x;
          tile_mask_y |= stencil_tile_mask_y;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index ee5904d6a48..4e8418e72d7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1119,31 +1119,18 @@ intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
  * untiled, the masks are set to 0.
  */
 void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
-                             uint32_t *mask_x, uint32_t *mask_y,
-                             bool map_stencil_as_y_tiled)
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                     bool map_stencil_as_y_tiled,
+                     uint32_t *mask_x, uint32_t *mask_y)
 {
-   int cpp = mt->cpp;
-   uint32_t tiling = mt->tiling;
-
+   uint32_t tile_w_bytes, tile_h;
    if (map_stencil_as_y_tiled)
       tiling = I915_TILING_Y;
 
-   switch (tiling) {
-   default:
-      unreachable("not reached");
-   case I915_TILING_NONE:
-      *mask_x = *mask_y = 0;
-      break;
-   case I915_TILING_X:
-      *mask_x = 512 / cpp - 1;
-      *mask_y = 7;
-      break;
-   case I915_TILING_Y:
-      *mask_x = 128 / cpp - 1;
-      *mask_y = 31;
-      break;
-   }
+   intel_get_tile_dims(tiling, tr_mode, cpp, &tile_w_bytes, &tile_h);
+
+   *mask_x = tile_w_bytes / cpp - 1;
+   *mask_y = tile_h - 1;
 }
 
 /**
@@ -1208,7 +1195,7 @@ intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
    uint32_t x, y;
    uint32_t mask_x, mask_y;
 
-   intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, false);
+   intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp, false, &mask_x, &mask_y);
    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
 
    *tile_x = x & mask_x;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 294e3018520..bd77f950cc8 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -623,9 +623,9 @@ intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
                                        int *width, int *height, int *depth);
 
 void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
-                             uint32_t *mask_x, uint32_t *mask_y,
-                             bool map_stencil_as_y_tiled);
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                     bool map_stencil_as_y_tiled,
+                     uint32_t *mask_x, uint32_t *mask_y);
 
 void
 intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
-- 
cgit v1.2.3


From bbbc9fd8e5e2a4434c8630eacd7943d59aa73c47 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Thu, 9 Jul 2015 10:47:17 -0700
Subject: i965: Use helper function intel_get_tile_dims() in surface setup

It takes care of using the correct tile width if we later use other
tiling patterns for aux miptree.

V2: Remove the comment about using Yf for aux miptree.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/gen8_surface_state.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index d2f333fd4dd..c5f1bae8ecb 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -276,8 +276,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
    }
 
    if (aux_mt) {
+      uint32_t tile_w, tile_h;
+      assert(aux_mt->tiling == I915_TILING_Y);
+      intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+                          aux_mt->cpp, &tile_w, &tile_h);
       surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
-                SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+                SET_FIELD((aux_mt->pitch / tile_w) - 1,
+                          GEN8_SURFACE_AUX_PITCH) |
                 aux_mode;
    } else {
       surf[6] = 0;
@@ -501,8 +506,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
    }
 
    if (aux_mt) {
+      uint32_t tile_w, tile_h;
+      assert(aux_mt->tiling == I915_TILING_Y);
+      intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+                          aux_mt->cpp, &tile_w, &tile_h);
       surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
-                SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+                SET_FIELD((aux_mt->pitch / tile_w) - 1,
+                          GEN8_SURFACE_AUX_PITCH) |
                 aux_mode;
    } else {
       surf[6] = 0;
-- 
cgit v1.2.3


From 485285498f39aa9590d430dee6d52c796531b8c0 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 11 Aug 2015 16:26:39 -0700
Subject: i965: Delete temporary variable 'src_pitch'

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 3a2a1818bc7..8ddb313a491 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -327,10 +327,6 @@ intel_miptree_blit(struct brw_context *brw,
    if (dst_flip)
       dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height;
 
-   int src_pitch = src_mt->pitch;
-   if (src_flip != dst_flip)
-      src_pitch = -src_pitch;
-
    uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
    intel_miptree_get_image_offset(src_mt, src_level, src_slice,
                                   &src_image_x, &src_image_y);
@@ -353,7 +349,7 @@ intel_miptree_blit(struct brw_context *brw,
 
    if (!intelEmitCopyBlit(brw,
                           src_mt->cpp,
-                          src_pitch,
+                          src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch,
                           src_mt->bo, src_mt->offset,
                           src_mt->tiling,
                           src_mt->tr_mode,
-- 
cgit v1.2.3


From e83b07aa7b124184decd68a64d970e8f408f8725 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 11 Aug 2015 16:30:34 -0700
Subject: i965: Move conversion of {src, dst}_pitch to dwords outside if/else

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 8ddb313a491..8bb41207930 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -587,15 +587,6 @@ intelEmitCopyBlit(struct brw_context *brw,
                         dst_tiling, dst_tr_mode,
                         cpp, use_fast_copy_blit);
 
-      /* For tiled source and destination, pitch value should be specified
-       * as a number of Dwords.
-       */
-      if (dst_tiling != I915_TILING_NONE)
-         dst_pitch /= 4;
-
-      if (src_tiling != I915_TILING_NONE)
-         src_pitch /= 4;
-
    } else {
       assert(!dst_y_tiled || (dst_pitch % 128) == 0);
       assert(!src_y_tiled || (src_pitch % 128) == 0);
@@ -636,17 +627,19 @@ intelEmitCopyBlit(struct brw_context *brw,
       CMD = xy_blit_cmd(src_tiling, src_tr_mode,
                         dst_tiling, dst_tr_mode,
                         cpp, use_fast_copy_blit);
+   }
 
-      if (dst_tiling != I915_TILING_NONE)
-         dst_pitch /= 4;
+   /* For tiled source and destination, pitch value should be specified
+    * as a number of Dwords.
+    */
+   if (dst_tiling != I915_TILING_NONE)
+      dst_pitch /= 4;
 
-      if (src_tiling != I915_TILING_NONE)
-         src_pitch /= 4;
-   }
+   if (src_tiling != I915_TILING_NONE)
+      src_pitch /= 4;
 
-   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x)
       return true;
-   }
 
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
-- 
cgit v1.2.3


From 0fa39bff19dc2fbd3c184bd0e1267c86bd5040d9 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 11 Aug 2015 16:31:39 -0700
Subject: i965: Fix {src, dst}_pitch alignment check for XY_SRC_COPY_BLT

Current code checks the alignment restrictions only for Y tiling.
From Broadwell PRM vol 10:

 "pitch is of 512Byte granularity for Tile-X: This means the tiled-x
  surface pitch can be (512, 1024, 1536, 2048...)/4 (in Dwords)."

This patch adds the restriction for X tiling as well.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 8bb41207930..0cd2a203cb3 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -522,6 +522,8 @@ intelEmitCopyBlit(struct brw_context *brw,
    bool dst_y_tiled = dst_tiling == I915_TILING_Y;
    bool src_y_tiled = src_tiling == I915_TILING_Y;
    bool use_fast_copy_blit = false;
+   uint32_t src_tile_w, src_tile_h;
+   uint32_t dst_tile_w, dst_tile_h;
 
    if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
       return false;
@@ -550,6 +552,9 @@ intelEmitCopyBlit(struct brw_context *brw,
        src_buffer, src_pitch, src_offset, src_x, src_y,
        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
 
+   intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h);
+   intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h);
+
    use_fast_copy_blit = can_fast_copy_blit(brw,
                                            src_buffer,
                                            src_x, src_y,
@@ -588,8 +593,8 @@ intelEmitCopyBlit(struct brw_context *brw,
                         cpp, use_fast_copy_blit);
 
    } else {
-      assert(!dst_y_tiled || (dst_pitch % 128) == 0);
-      assert(!src_y_tiled || (src_pitch % 128) == 0);
+      assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0);
+      assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0);
 
       /* For big formats (such as floating point), do the copy using 16 or
        * 32bpp and multiply the coordinates.
-- 
cgit v1.2.3


From 0bfd914f9f02c85a4ad8e6892f1a31e37c14f2b2 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Tue, 11 Aug 2015 16:36:44 -0700
Subject: i965/gen9: Fix {src, dst}_pitch alignment check for XY_FAST_COPY_BLT

I misinterpreted the alignmnet restriction in XY_FAST_COPY_BLT earlier.
Instead of checking pitch for 64KB alignmnet we need to check it for
tile widh alignment.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 0cd2a203cb3..9184ad62e34 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -439,14 +439,6 @@ can_fast_copy_blit(struct brw_context *brw,
        (dst_tiling_none && dst_pitch % 16 != 0))
       return false;
 
-   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
-    * (X direction width of the Tile). This means the pitch value will
-    * always be Cache Line aligned (64byte multiple).
-    */
-   if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
-       (!src_tiling_none && src_pitch % 64 != 0))
-      return false;
-
    return true;
 }
 
@@ -555,6 +547,13 @@ intelEmitCopyBlit(struct brw_context *brw,
    intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h);
    intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h);
 
+   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+    * (X direction width of the Tile). This is ensured while allocating the
+    * buffer object.
+    */
+   assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0);
+   assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0);
+
    use_fast_copy_blit = can_fast_copy_blit(brw,
                                            src_buffer,
                                            src_x, src_y,
@@ -593,9 +592,6 @@ intelEmitCopyBlit(struct brw_context *brw,
                         cpp, use_fast_copy_blit);
 
    } else {
-      assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0);
-      assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0);
-
       /* For big formats (such as floating point), do the copy using 16 or
        * 32bpp and multiply the coordinates.
        */
-- 
cgit v1.2.3


From 4c5308bbf4fa5c6ac274f6e1ab7c8846a2866976 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Wed, 19 Aug 2015 16:26:59 -0700
Subject: i965: Rename intel_miptree_get_dimensions_for_image()

This function isn't specific to miptrees. So, drop the "miptree"
from function name.

V3: Add a comment explaining how the 1D Array texture height and
    depth is interpreted by Intel hardware.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_fbo.c          |  2 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 12 +++++++++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  4 ++--
 src/mesa/drivers/dri/i965/intel_tex_image.c    |  3 +--
 src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +--
 5 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index b2160552d33..6b2349e8b69 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -1027,7 +1027,7 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
    uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
                            MIPTREE_LAYOUT_TILING_ANY;
 
-   intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
+   intel_get_image_dims(rb->TexImage, &width, &height, &depth);
 
    new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
                                  intel_image->base.Base.TexFormat,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 4e8418e72d7..9c0304c7684 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -921,12 +921,18 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
    *mt = NULL;
 }
 
+
 void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
-                                       int *width, int *height, int *depth)
+intel_get_image_dims(struct gl_texture_image *image,
+                     int *width, int *height, int *depth)
 {
    switch (image->TexObject->Target) {
    case GL_TEXTURE_1D_ARRAY:
+      /* For a 1D Array texture the OpenGL API will treat the image height as
+       * the number of array slices. For Intel hardware, we treat the 1D array
+       * as a 2D Array with a height of 1. So, here we want to swap image
+       * height and depth.
+       */
       *width = image->Width;
       *height = 1;
       *depth = image->Height;
@@ -970,7 +976,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
    if (image->TexFormat != mt_format)
       return false;
 
-   intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
+   intel_get_image_dims(image, &width, &height, &depth);
 
    if (mt->target == GL_TEXTURE_CUBE_MAP)
       depth = 6;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bd77f950cc8..7610d754451 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -619,8 +619,8 @@ intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
 			       GLuint *x, GLuint *y);
 
 void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
-                                       int *width, int *height, int *depth);
+intel_get_image_dims(struct gl_texture_image *image,
+                     int *width, int *height, int *depth);
 
 void
 intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 93a8cdee0cb..34b91e823e0 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -42,8 +42,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
    int width, height, depth;
    GLuint i;
 
-   intel_miptree_get_dimensions_for_image(&intelImage->base.Base,
-                                          &width, &height, &depth);
+   intel_get_image_dims(&intelImage->base.Base, &width, &height, &depth);
 
    DBG("%s\n", __func__);
 
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index d3fb252b5d5..2cf9c13be33 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -128,8 +128,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
    /* May need to create a new tree:
     */
    if (!intelObj->mt) {
-      intel_miptree_get_dimensions_for_image(&firstImage->base.Base,
-					     &width, &height, &depth);
+      intel_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
 
       perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
                  "finalized texture miptree.\n",
-- 
cgit v1.2.3


From d4e650b07bc80075f0d088e7d85df9efa45e11bd Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 25 Sep 2015 16:34:07 -0400
Subject: nouveau: be more careful about freeing temporary transfer buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Deleting a buffer does not flush the command stream. Make sure that we
wait for the copies to finish before deleting the temporary bo.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
Tested-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
---
 src/gallium/drivers/nouveau/nouveau_fence.c      | 8 ++++++++
 src/gallium/drivers/nouveau/nouveau_fence.h      | 3 +++
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c  | 9 +++++++--
 src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 7 ++++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 7 ++++++-
 5 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index abcdb479954..ee4e08dd520 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -231,3 +231,11 @@ nouveau_fence_next(struct nouveau_screen *screen)
 
    nouveau_fence_new(screen, &screen->fence.current, false);
 }
+
+void
+nouveau_fence_unref_bo(void *data)
+{
+   struct nouveau_bo *bo = data;
+
+   nouveau_bo_ref(NULL, &bo);
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index a1587051b0f..2efcab2172d 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -37,6 +37,9 @@ void nouveau_fence_next(struct nouveau_screen *);
 bool nouveau_fence_wait(struct nouveau_fence *);
 bool nouveau_fence_signalled(struct nouveau_fence *);
 
+void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
+
+
 static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 735c718c4b3..c6f69650068 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -339,10 +339,15 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
    struct nv30_context *nv30 = nv30_context(pipe);
    struct nv30_transfer *tx = nv30_transfer(ptx);
 
-   if (ptx->usage & PIPE_TRANSFER_WRITE)
+   if (ptx->usage & PIPE_TRANSFER_WRITE) {
       nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
 
-   nouveau_bo_ref(NULL, &tx->tmp.bo);
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv30->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->tmp.bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->tmp.bo);
+   }
    pipe_resource_reference(&ptx->resource, NULL);
    FREE(tx);
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index fc6b24aac28..be514077d32 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -365,9 +365,14 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
             tx->rect[0].base += mt->layer_stride;
          tx->rect[1].base += tx->nblocksy * tx->base.stride;
       }
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv50->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
    }
 
-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
    pipe_resource_reference(&transfer->resource, NULL);
 
    FREE(tx);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index d4831bfd9d4..aaec60a5ac2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -495,11 +495,16 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
          tx->rect[1].base += tx->nblocksy * tx->base.stride;
       }
       NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nvc0->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
    }
    if (tx->base.usage & PIPE_TRANSFER_READ)
       NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
 
-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
    pipe_resource_reference(&transfer->resource, NULL);
 
    FREE(tx);
-- 
cgit v1.2.3


From 3a6b9a7830c3df14ffcfbbf57c82ea08bd59ef04 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 25 Sep 2015 19:05:14 -0400
Subject: nouveau: delay deleting buffer with unflushed fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If there is an unflushed fence on the bo, then the resource may still be
used in commands built up in the local pushbuf. Flushing can cause all
sorts of unwanted effects, so just free the bo when the relevant fence
is hit.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
Tested-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
---
 src/gallium/drivers/nouveau/nouveau_buffer.c    | 7 ++++++-
 src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 5 ++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 4937dae8b06..310e9dbcfd4 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -80,7 +80,12 @@ release_allocation(struct nouveau_mm_allocation **mm,
 inline void
 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
 {
-   nouveau_bo_ref(NULL, &buf->bo);
+   if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+      nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
+      buf->bo = NULL;
+   } else {
+      nouveau_bo_ref(NULL, &buf->bo);
+   }
 
    if (buf->mm)
       release_allocation(&buf->mm, buf->fence);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 92d49e49ff2..812d10ce667 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -163,7 +163,10 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
 {
    struct nv50_miptree *mt = nv50_miptree(pt);
 
-   nouveau_bo_ref(NULL, &mt->base.bo);
+   if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+      nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
+   else
+      nouveau_bo_ref(NULL, &mt->base.bo);
 
    nouveau_fence_ref(NULL, &mt->base.fence);
    nouveau_fence_ref(NULL, &mt->base.fence_wr);
-- 
cgit v1.2.3


From 1d8cba9b51b7a6e7dbf3f0d3f53b5c232fd0b5b2 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 28 Sep 2015 15:18:08 -0400
Subject: nouveau: wait to unref the transfer's bo until it's no longer used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bo will often come from a slab in which case it doesn't matter. But
for larger allocations this will be in its own bo, and we have to make
sure to wait until it's no longer used in order for it to be freed.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
Tested-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
---
 src/gallium/drivers/nouveau/nouveau_buffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 310e9dbcfd4..72e070b5f06 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -286,7 +286,8 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
 {
    if (tx->map) {
       if (likely(tx->bo)) {
-         nouveau_bo_ref(NULL, &tx->bo);
+         nouveau_fence_work(nv->screen->fence.current,
+                            nouveau_fence_unref_bo, tx->bo);
          if (tx->mm)
             release_allocation(&tx->mm, nv->screen->fence.current);
       } else {
@@ -787,7 +788,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
       nv->copy_data(nv, buf->bo, buf->offset, new_domain,
                     bo, offset, old_domain, buf->base.width0);
 
-      nouveau_bo_ref(NULL, &bo);
+      nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
       if (mm)
          release_allocation(&mm, screen->fence.current);
    } else
-- 
cgit v1.2.3


From 945592f92ca91e21307d10b657835d512bacf8b8 Mon Sep 17 00:00:00 2001
From: Anuj Phogat <anuj.phogat@gmail.com>
Date: Fri, 24 Jul 2015 14:52:01 -0700
Subject: i965/gen9: Add a condition for starting pixel in fast copy blit

This condition restricts the use of fast copy blit to cases
where starting pixel of src and dst is oword (16 byte) aligned.

Many piglit tests (if using fast copy blit in Mesa) failed earlier
because I missed adding this condition.Fast copy blit is currently
enabled for use only with Yf/Ys tiling.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 9184ad62e34..46fccc8d6ce 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -420,6 +420,10 @@ can_fast_copy_blit(struct brw_context *brw,
        dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
       return false;
 
+   /* The start pixel for Fast Copy blit should be on an OWord boundary. */
+   if ((dst_x * cpp | src_x * cpp) & 15)
+      return false;
+
    /* For all surface types buffers must be cacheline-aligned. */
    if ((dst_offset | src_offset) & 63)
       return false;
-- 
cgit v1.2.3


From 38004eb17ce9663a4343bae4e783e97e73596e9c Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 11 Sep 2015 10:02:56 +0200
Subject: main: fix ACTIVE_UNIFORM_BLOCKS value

NumUniformBlocks also counts shader storage blocks.
NumUniformBlocks variable will be renamed in a later patch to avoid
misunderstandings.

v2:

- Modify the condition to use !IsShaderStorage and the list of
  uniform blocks (Timothy)

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/mesa/main/shaderapi.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index edc23bcefe3..00c51e2bd3d 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -729,7 +729,11 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
       if (!has_ubo)
          break;
 
-      *params = shProg->NumUniformBlocks;
+      *params = 0;
+      for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+         if (!shProg->UniformBlocks[i].IsShaderStorage)
+            (*params)++;
+      }
       return;
    case GL_PROGRAM_BINARY_RETRIEVABLE_HINT:
       /* This enum isn't part of the OES extension for OpenGL ES 2.0.  It is
-- 
cgit v1.2.3


From 6668eb5a451c43ac78a784711cf239fdf7ca75ef Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 11 Sep 2015 12:29:37 +0200
Subject: mesa: rename gl_shader_program's NumUniformBlocks to
 NumBufferInterfaceBlocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because it counts shader storage blocks too.

v2:
- Use NumBufferInterfaceBlocks instead (Jordan).

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/link_uniform_initializers.cpp         |  2 +-
 src/glsl/link_uniforms.cpp                     |  4 ++--
 src/glsl/linker.cpp                            | 10 +++++-----
 src/glsl/standalone_scaffolding.cpp            |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       |  8 ++++----
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp     |  8 ++++----
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  2 +-
 src/mesa/main/mtypes.h                         |  2 +-
 src/mesa/main/shaderapi.c                      |  4 ++--
 src/mesa/main/shaderobj.c                      |  2 +-
 src/mesa/main/uniforms.c                       |  8 ++++----
 11 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'src')

diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp
index 05000fc39ef..34830829b4a 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -48,7 +48,7 @@ static unsigned
 get_uniform_block_index(const gl_shader_program *shProg,
                         const char *uniformBlockName)
 {
-   for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
       if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
 	 return i;
    }
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 94d7287221a..47d49c84e65 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -500,7 +500,7 @@ public:
          if (var->is_interface_instance() && var->type->is_array()) {
             unsigned l = strlen(var->get_interface_type()->name);
 
-            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+            for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
                if (strncmp(var->get_interface_type()->name,
                            prog->UniformBlocks[i].Name,
                            l) == 0
@@ -510,7 +510,7 @@ public:
                }
             }
          } else {
-            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+            for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
                if (strcmp(var->get_interface_type()->name,
                           prog->UniformBlocks[i].Name) == 0) {
                   ubo_block_index = i;
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 75396fb3936..9bacd22b9d4 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1187,7 +1187,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
       for (unsigned int j = 0; j < sh->NumUniformBlocks; j++) {
 	 int index = link_cross_validate_uniform_block(prog,
 						       &prog->UniformBlocks,
-						       &prog->NumUniformBlocks,
+						       &prog->NumBufferInterfaceBlocks,
 						       &sh->UniformBlocks[j]);
 
 	 if (index == -1) {
@@ -2802,7 +2802,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
    unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
    unsigned total_shader_storage_blocks = 0;
 
-   for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+   for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
       /* Don't check SSBOs for Uniform Block Size */
       if (!prog->UniformBlocks[i].IsShaderStorage &&
           prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
@@ -2836,7 +2836,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
       if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
 	 linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
-		      prog->NumUniformBlocks,
+		      prog->NumBufferInterfaceBlocks,
 		      ctx->Const.MaxCombinedUniformBlocks);
       } else {
 	 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@@ -2939,7 +2939,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
          total_image_units += sh->NumImages;
 
-         for (unsigned j = 0; j < prog->NumUniformBlocks; j++) {
+         for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
             int stage_index = prog->UniformBlockStageIndex[i][j];
             if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage)
                total_shader_storage_blocks++;
@@ -3418,7 +3418,7 @@ build_program_resource_list(struct gl_shader_program *shProg)
    }
 
    /* Add program uniform blocks and shader storage blocks. */
-   for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
       bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
       GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
       if (!add_program_resource(shProg, type,
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index e52869f288e..ea9334fd7b7 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -101,7 +101,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
 
    ralloc_free(shProg->UniformBlocks);
    shProg->UniformBlocks = NULL;
-   shProg->NumUniformBlocks = 0;
+   shProg->NumBufferInterfaceBlocks = 0;
    for (i = 0; i < MESA_SHADER_STAGES; i++) {
       ralloc_free(shProg->UniformBlockStageIndex[i]);
       shProg->UniformBlockStageIndex[i] = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a2bc5c64e4e..cf7e74256fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1530,7 +1530,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           */
          brw_mark_surface_used(prog_data,
                                stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       if (has_indirect) {
@@ -1593,7 +1593,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           */
          brw_mark_surface_used(prog_data,
                                stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       /* Get the offset to read from */
@@ -1786,7 +1786,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
          brw_mark_surface_used(prog_data,
                                stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       /* Offset */
@@ -1952,7 +1952,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
        */
       brw_mark_surface_used(prog_data,
                             stage_prog_data->binding_table.ubo_start +
-                            shader_prog->NumUniformBlocks - 1);
+                            shader_prog->NumBufferInterfaceBlocks - 1);
    }
 
    fs_reg offset = get_nir_src(instr->src[1]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 2d2e5753eff..94906d2e705 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -592,7 +592,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
          brw_mark_surface_used(&prog_data->base,
                                prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       /* Offset */
@@ -739,7 +739,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           */
          brw_mark_surface_used(&prog_data->base,
                                prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       src_reg offset_reg = src_reg(this, glsl_type::uint_type);
@@ -889,7 +889,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           */
          brw_mark_surface_used(&prog_data->base,
                                prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       unsigned const_offset = instr->const_index[0];
@@ -950,7 +950,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
        */
       brw_mark_surface_used(&prog_data->base,
                             prog_data->base.binding_table.ubo_start +
-                            shader_prog->NumUniformBlocks - 1);
+                            shader_prog->NumBufferInterfaceBlocks - 1);
    }
 
    src_reg offset = get_nir_src(instr->src[1], 1);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3443e5cb759..3cb783ece1c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1872,7 +1872,7 @@ vec4_visitor::visit(ir_expression *ir)
           */
          brw_mark_surface_used(&prog_data->base,
                                prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       if (const_offset_ir) {
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a872942d2a8..288d75790a4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2689,7 +2689,7 @@ struct gl_shader_program
     */
    unsigned LastClipDistanceArraySize;
 
-   unsigned NumUniformBlocks;
+   unsigned NumBufferInterfaceBlocks;
    struct gl_uniform_block *UniformBlocks;
 
    /**
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 00c51e2bd3d..418121dcb99 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -713,7 +713,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
       if (!has_ubo)
          break;
 
-      for (i = 0; i < shProg->NumUniformBlocks; i++) {
+      for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
 	 /* Add one for the terminating NUL character.
 	  */
 	 const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
@@ -730,7 +730,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
          break;
 
       *params = 0;
-      for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+      for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
          if (!shProg->UniformBlocks[i].IsShaderStorage)
             (*params)++;
       }
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 71d4ed69285..5cd37d7e4c4 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -294,7 +294,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
 
    ralloc_free(shProg->UniformBlocks);
    shProg->UniformBlocks = NULL;
-   shProg->NumUniformBlocks = 0;
+   shProg->NumBufferInterfaceBlocks = 0;
    for (i = 0; i < MESA_SHADER_STAGES; i++) {
       ralloc_free(shProg->UniformBlockStageIndex[i]);
       shProg->UniformBlockStageIndex[i] = NULL;
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index c491707e937..04cc81f9809 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1002,10 +1002,10 @@ _mesa_UniformBlockBinding(GLuint program,
    if (!shProg)
       return;
 
-   if (uniformBlockIndex >= shProg->NumUniformBlocks) {
+   if (uniformBlockIndex >= shProg->NumBufferInterfaceBlocks) {
       _mesa_error(ctx, GL_INVALID_VALUE,
 		  "glUniformBlockBinding(block index %u >= %u)",
-		  uniformBlockIndex, shProg->NumUniformBlocks);
+		  uniformBlockIndex, shProg->NumBufferInterfaceBlocks);
       return;
    }
 
@@ -1054,10 +1054,10 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
    if (!shProg)
       return;
 
-   if (shaderStorageBlockIndex >= shProg->NumUniformBlocks) {
+   if (shaderStorageBlockIndex >= shProg->NumBufferInterfaceBlocks) {
       _mesa_error(ctx, GL_INVALID_VALUE,
 		  "glShaderStorageBlockBinding(block index %u >= %u)",
-		  shaderStorageBlockIndex, shProg->NumUniformBlocks);
+		  shaderStorageBlockIndex, shProg->NumBufferInterfaceBlocks);
       return;
    }
 
-- 
cgit v1.2.3


From 7efb2350199f7ea5acbadb76a1a19eda75b45a45 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Fri, 11 Sep 2015 12:31:10 +0200
Subject: glsl: use correct number of uniform blocks in error message

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/linker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 9bacd22b9d4..87c7d4b087b 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2836,7 +2836,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
 
       if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
 	 linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
-		      prog->NumBufferInterfaceBlocks,
+		      total_uniform_blocks,
 		      ctx->Const.MaxCombinedUniformBlocks);
       } else {
 	 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-- 
cgit v1.2.3


From bea66d22f2c0e97242c15652376ae4e82779f343 Mon Sep 17 00:00:00 2001
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Date: Mon, 28 Sep 2015 14:23:34 +0200
Subject: util: implement strndup for WIN32

v2:
- Add strndup.h to Makefile.sources (Emil)
- Use calloc instead of malloc (Emil).
- Check if allocation fails (Emil, Jose)
- Add '#pragma once' and include stdlib.h to strndup.h (Jose)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92124
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/mesa/main/shader_query.cpp |  1 +
 src/util/Makefile.sources      |  2 ++
 src/util/strndup.c             | 49 ++++++++++++++++++++++++++++++++++++++++++
 src/util/strndup.h             | 32 +++++++++++++++++++++++++++
 4 files changed, 84 insertions(+)
 create mode 100644 src/util/strndup.c
 create mode 100644 src/util/strndup.h

(limited to 'src')

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index e020dce288a..0cada5024bd 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -37,6 +37,7 @@
 #include "../glsl/program.h"
 #include "uniforms.h"
 #include "main/enums.h"
+#include "util/strndup.h"
 
 extern "C" {
 #include "shaderapi.h"
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index afdd0cb3b5c..ef38b5ac7d1 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -17,6 +17,8 @@ MESA_UTIL_FILES :=	\
 	set.c \
 	set.h \
 	simple_list.h \
+	strndup.c \
+	strndup.h \
 	strtod.c \
 	strtod.h \
 	texcompress_rgtc_tmp.h \
diff --git a/src/util/strndup.c b/src/util/strndup.c
new file mode 100644
index 00000000000..2c24d3789c2
--- /dev/null
+++ b/src/util/strndup.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#if defined(_WIN32)
+#include <stdlib.h>
+#include "strndup.h"
+
+char *
+strndup(const char *str, size_t max)
+{
+   size_t n;
+   char *ptr;
+
+   if (!str)
+      return NULL;
+
+   n = strlen(str);
+   if (n > max)
+      n = max;
+
+   ptr = (char *) calloc(n + 1, sizeof(char));
+   if (!ptr)
+      return NULL;
+
+   memcpy(ptr, str, n);
+   return ptr;
+}
+
+#endif
diff --git a/src/util/strndup.h b/src/util/strndup.h
new file mode 100644
index 00000000000..54346823596
--- /dev/null
+++ b/src/util/strndup.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdlib.h> // size_t
+
+#if defined(_WIN32)
+
+extern char *strndup(const char *str, size_t max);
+
+#endif
-- 
cgit v1.2.3


From 6bf718fec22f605702c7d15503d4dbc3c2be35e6 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 28 Sep 2015 12:59:33 +0200
Subject: glsl: Fix unused value warning reported by Coverity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't use param in this part of the code, so no point in advancing
the pointer forward:

>>>     CID 1324983:  Code maintainability issues  (UNUSED_VALUE)
>>>     Assigning value from "param->get_next()" to "param" here, but that stored value is overwritten before it can be used.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 31885cd3dec..4e09b080e62 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -1099,13 +1099,11 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
 
    const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
       glsl_type::int_type : glsl_type::uint_type;
-   param = param->get_next();
    sig_param = new(mem_ctx)
          ir_variable(type, "data1", ir_var_function_in);
    sig_params.push_tail(sig_param);
 
    if (param_count == 3) {
-      param = param->get_next();
       sig_param = new(mem_ctx)
             ir_variable(type, "data2", ir_var_function_in);
       sig_params.push_tail(sig_param);
-- 
cgit v1.2.3


From 1dc2db7a4dfb0e88a51a27c2234b6a01dead80bf Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 28 Sep 2015 12:59:34 +0200
Subject: glsl: Fix null return coverity warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an assert on the result of as_dereference() not being NULL:

>>>     CID 1324978:  Null pointer dereferences  (NULL_RETURNS)
>>>     Dereferencing a null pointer "deref_record->record->as_dereference()".

Since we are introducing a new variable to hold the result of
as_dereference(), take the opportunity to rename deref_record_type to
interface_type and just name the new variable interface_deref, which is
less confusing.

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/lower_ubo_reference.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 4e09b080e62..e581306019b 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -922,12 +922,14 @@ lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *dere
    case ir_type_dereference_record:
    {
       ir_dereference_record *deref_record = (ir_dereference_record *) deref;
-      const struct glsl_type *deref_record_type =
-         deref_record->record->as_dereference()->type;
-      unsigned record_length = deref_record_type->length;
+      ir_dereference *interface_deref =
+         deref_record->record->as_dereference();
+      assert(interface_deref != NULL);
+      const struct glsl_type *interface_type = interface_deref->type;
+      unsigned record_length = interface_type->length;
       /* Unsized array is always the last element of the interface */
       const struct glsl_type *unsized_array_type =
-         deref_record_type->fields.structure[record_length - 1].type->fields.array;
+         interface_type->fields.structure[record_length - 1].type->fields.array;
 
       const bool array_row_major =
          is_dereferenced_thing_row_major(deref_record);
-- 
cgit v1.2.3


From 12d510ab742baad88d624585938c723ffedbcae7 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Mon, 28 Sep 2015 12:59:35 +0200
Subject: glsl: Fix forward NULL dereference coverity warning

The comment says that it should be impossible for decl_type to be NULL
here, so don't try to handle the case where it is, simply add an assert.

>>>     CID 1324977:  Null pointer dereferences  (FORWARD_NULL)
>>>     Comparing "decl_type" to null implies that "decl_type" might be null.

No piglit regressions observed.

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
---
 src/glsl/ast_to_hir.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 5e69bccd9a4..6899a554f2c 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5732,17 +5732,16 @@ ast_process_structure_or_interface_block(exec_list *instructions,
           * is_interface case, will have resulted in compilation having
           * already halted due to a syntax error.
           */
-         const struct glsl_type *field_type =
-            decl_type != NULL ? decl_type : glsl_type::error_type;
+         assert(decl_type);
 
-         if (is_interface && field_type->contains_opaque()) {
+         if (is_interface && decl_type->contains_opaque()) {
             YYLTYPE loc = decl_list->get_location();
             _mesa_glsl_error(&loc, state,
                              "uniform/buffer in non-default interface block contains "
                              "opaque variable");
          }
 
-         if (field_type->contains_atomic()) {
+         if (decl_type->contains_atomic()) {
             /* From section 4.1.7.3 of the GLSL 4.40 spec:
              *
              *    "Members of structures cannot be declared as atomic counter
@@ -5753,7 +5752,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                              "shader storage block or uniform block");
          }
 
-         if (field_type->contains_image()) {
+         if (decl_type->contains_image()) {
             /* FINISHME: Same problem as with atomic counters.
              * FINISHME: Request clarification from Khronos and add
              * FINISHME: spec quotation here.
@@ -5784,8 +5783,8 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                              "to struct or interface block members");
          }
 
-         field_type = process_array_type(&loc, decl_type,
-                                         decl->array_specifier, state);
+         const struct glsl_type *field_type =
+            process_array_type(&loc, decl_type, decl->array_specifier, state);
          fields[i].type = field_type;
          fields[i].name = decl->identifier;
          fields[i].location = -1;
-- 
cgit v1.2.3


From c0722be9f58ef89dae98d8c459ec4f9589f97748 Mon Sep 17 00:00:00 2001
From: Tapani Pälli <tapani.palli@intel.com>
Date: Mon, 3 Aug 2015 08:58:20 +0300
Subject: mesa: fix ARRAY_SIZE query for GetProgramResourceiv
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch also refactors name length queries which were using array size
in computation, this has to be done in same time to avoid regression in
arb_program_interface_query-resource-query Piglit test.

Fixes rest of the failures with
   ES31-CTS.program_interface_query.no-locations

v2: make additional check only for GS inputs
v3: create helper function for resource name length
    so that it gets calculated only in one place

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
---
 src/mesa/main/program_resource.c |  8 ++--
 src/mesa/main/shader_query.cpp   | 94 ++++++++++++++++++++++++----------------
 src/mesa/main/shaderapi.h        |  3 ++
 3 files changed, 62 insertions(+), 43 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index c609abeed45..eb71fdde703 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -111,11 +111,9 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
       for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
          if (shProg->ProgramResourceList[i].Type != programInterface)
             continue;
-         const char *name =
-            _mesa_program_resource_name(&shProg->ProgramResourceList[i]);
-         unsigned array_size =
-            _mesa_program_resource_array_size(&shProg->ProgramResourceList[i]);
-         *params = MAX2(*params, strlen(name) + (array_size ? 3 : 0) + 1);
+         unsigned len =
+            _mesa_program_resource_name_len(&shProg->ProgramResourceList[i]);
+         *params = MAX2(*params, len + 1);
       }
       break;
    case GL_MAX_NUM_ACTIVE_VARIABLES:
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 0cada5024bd..b6d36776562 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -479,7 +479,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
              RESOURCE_XFB(res)->Size : 0;
    case GL_PROGRAM_INPUT:
    case GL_PROGRAM_OUTPUT:
-      return RESOURCE_VAR(res)->data.max_array_access;
+      return RESOURCE_VAR(res)->type->length;
    case GL_UNIFORM:
    case GL_VERTEX_SUBROUTINE_UNIFORM:
    case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -671,6 +671,57 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
    return NULL;
 }
 
+/* Function returns if resource name is expected to have index
+ * appended into it.
+ *
+ *
+ * Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
+ * spec says:
+ *
+ *     "If the active uniform is an array, the uniform name returned in
+ *     name will always be the name of the uniform array appended with
+ *     "[0]"."
+ *
+ * The same text also appears in the OpenGL 4.2 spec.  It does not,
+ * however, appear in any previous spec.  Previous specifications are
+ * ambiguous in this regard.  However, either name can later be passed
+ * to glGetUniformLocation (and related APIs), so there shouldn't be any
+ * harm in always appending "[0]" to uniform array names.
+ *
+ * Geometry shader stage has different naming convention where the 'normal'
+ * condition is an array, therefore for variables referenced in geometry
+ * stage we do not add '[0]'.
+ *
+ * Note, that TCS outputs and TES inputs should not have index appended
+ * either.
+ */
+static bool
+add_index_to_name(struct gl_program_resource *res)
+{
+   bool add_index = !(((res->Type == GL_PROGRAM_INPUT) &&
+                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
+
+   /* Transform feedback varyings have array index already appended
+    * in their names.
+    */
+   if (res->Type == GL_TRANSFORM_FEEDBACK_VARYING)
+      add_index = false;
+
+   return add_index;
+}
+
+/* Get name length of a program resource. This consists of
+ * base name + 3 for '[0]' if resource is an array.
+ */
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res)
+{
+   unsigned length = strlen(_mesa_program_resource_name(res));
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res))
+      length += 3;
+   return length;
+}
+
 /* Get full name of a program resource.
  */
 bool
@@ -706,36 +757,7 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
 
    _mesa_copy_string(name, bufSize, length, _mesa_program_resource_name(res));
 
-   /* Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
-    * spec says:
-    *
-    *     "If the active uniform is an array, the uniform name returned in
-    *     name will always be the name of the uniform array appended with
-    *     "[0]"."
-    *
-    * The same text also appears in the OpenGL 4.2 spec.  It does not,
-    * however, appear in any previous spec.  Previous specifications are
-    * ambiguous in this regard.  However, either name can later be passed
-    * to glGetUniformLocation (and related APIs), so there shouldn't be any
-    * harm in always appending "[0]" to uniform array names.
-    *
-    * Geometry shader stage has different naming convention where the 'normal'
-    * condition is an array, therefore for variables referenced in geometry
-    * stage we do not add '[0]'.
-    *
-    * Note, that TCS outputs and TES inputs should not have index appended
-    * either.
-    */
-   bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
-                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
-
-   /* Transform feedback varyings have array index already appended
-    * in their names.
-    */
-   if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
-      add_index = false;
-
-   if (add_index && _mesa_program_resource_array_size(res)) {
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res)) {
       int i;
 
       /* The comparison is strange because *length does *NOT* include the
@@ -1207,13 +1229,9 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
       switch (res->Type) {
       case GL_ATOMIC_COUNTER_BUFFER:
          goto invalid_operation;
-      case GL_TRANSFORM_FEEDBACK_VARYING:
-         *val = strlen(_mesa_program_resource_name(res)) + 1;
-         break;
       default:
-         /* Base name +3 if array '[0]' + terminator. */
-         *val = strlen(_mesa_program_resource_name(res)) +
-            (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+         /* Resource name length + terminator. */
+         *val = _mesa_program_resource_name_len(res) + 1;
       }
       return 1;
    case GL_TYPE:
@@ -1240,7 +1258,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
             return 1;
       case GL_PROGRAM_INPUT:
       case GL_PROGRAM_OUTPUT:
-         *val = MAX2(RESOURCE_VAR(res)->type->length, 1);
+         *val = MAX2(_mesa_program_resource_array_size(res), 1);
          return 1;
       case GL_TRANSFORM_FEEDBACK_VARYING:
          *val = MAX2(RESOURCE_XFB(res)->Size, 1);
diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h
index 0a10191684f..fba767bf4c1 100644
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -245,6 +245,9 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
                                 GLsizei bufSize, GLsizei *length,
                                 GLchar *name, const char *caller);
 
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res);
+
 extern GLint
 _mesa_program_resource_location(struct gl_shader_program *shProg,
                                 GLenum programInterface, const char *name);
-- 
cgit v1.2.3


From 952366a60e0743ae025edaf370a233d0633edf7b Mon Sep 17 00:00:00 2001
From: Jose Fonseca <jfonseca@vmware.com>
Date: Tue, 29 Sep 2015 16:01:22 +0100
Subject: util: Fix strndup prototype on C++.

Trivial.
---
 src/util/strndup.c |  1 +
 src/util/strndup.h | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/util/strndup.c b/src/util/strndup.c
index 2c24d3789c2..ca1c6f53b57 100644
--- a/src/util/strndup.c
+++ b/src/util/strndup.c
@@ -23,6 +23,7 @@
 
 #if defined(_WIN32)
 #include <stdlib.h>
+#include <string.h>
 #include "strndup.h"
 
 char *
diff --git a/src/util/strndup.h b/src/util/strndup.h
index 54346823596..c5ed7a8c8d2 100644
--- a/src/util/strndup.h
+++ b/src/util/strndup.h
@@ -27,6 +27,14 @@
 
 #if defined(_WIN32)
 
-extern char *strndup(const char *str, size_t max);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *strndup(const char *str, size_t max);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 1665d29ee3125743fd6daf3c43fc715f543d5669 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 00:19:58 -0700
Subject: mesa/cs: Add MESA_VERBOSE=api support in DispatchCompute*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/main/compute.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src')

diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index a0120cf0c64..8bc3bcd25a0 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -34,6 +34,10 @@ _mesa_DispatchCompute(GLuint num_groups_x,
    GET_CURRENT_CONTEXT(ctx);
    const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
 
+   if (MESA_VERBOSE & VERBOSE_API)
+      _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
+                  num_groups_x, num_groups_y, num_groups_z);
+
    if (!_mesa_validate_DispatchCompute(ctx, num_groups))
       return;
 
@@ -45,6 +49,9 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
 {
    GET_CURRENT_CONTEXT(ctx);
 
+   if (MESA_VERBOSE & VERBOSE_API)
+      _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
+
    if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
       return;
 
-- 
cgit v1.2.3


From 2ec5f3e1d5616f9e5c223a9f48d4b1a0d2568225 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 00:25:27 -0700
Subject: i965/cs: Re-emit push constants and cs_state on new batches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to re-emit push constansts when a new batch is started since
the push constants are stored in the batch. We also need to re-emit
the MEDIA_INTERFACE_DESCRIPTOR (in brw_cs_state) since it is stored in
the batch.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index d9561cfce61..c53aa233819 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -181,7 +181,8 @@ brw_upload_cs_state(struct brw_context *brw)
 const struct brw_tracked_state brw_cs_state = {
    .dirty = {
       .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_CS_PROG_DATA |
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_CS_PROG_DATA |
              BRW_NEW_PUSH_CONSTANT_ALLOCATION,
    },
    .emit = brw_upload_cs_state
@@ -340,7 +341,8 @@ gen7_upload_cs_push_constants(struct brw_context *brw)
 const struct brw_tracked_state gen7_cs_push_constants = {
    .dirty = {
       .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_COMPUTE_PROGRAM |
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_COMPUTE_PROGRAM |
              BRW_NEW_PUSH_CONSTANT_ALLOCATION,
    },
    .emit = gen7_upload_cs_push_constants,
-- 
cgit v1.2.3


From 60cf84dea7dfe2749d774b487079684d6a613afc Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 00:26:59 -0700
Subject: i965/cs: Re-emit cs_state when surfaces have changed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike rendering (BINDING_TABLE_POINTERS_*S), compute doesn't have a
binding table pointers command. Instead it is part of the
MEDIA_INTERFACE_DESCRIPTOR structure loaded by the brw_cs_state atom.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index c53aa233819..0b88b2c0e71 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -183,7 +183,8 @@ const struct brw_tracked_state brw_cs_state = {
       .mesa = _NEW_PROGRAM_CONSTANTS,
       .brw = BRW_NEW_BATCH |
              BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+             BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+             BRW_NEW_SURFACES,
    },
    .emit = brw_upload_cs_state
 };
-- 
cgit v1.2.3


From d57a85f32b2242e19c2d20e91af0218d41e1e9fe Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 01:06:56 -0700
Subject: i965/cs: Store compute invocation information in brw context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will need this in an atom to setup a surface to read the
gl_NumWorkGroups values from.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 50 +++++++++++++++++----------------
 src/mesa/drivers/dri/i965/brw_context.h | 11 ++++++++
 2 files changed, 37 insertions(+), 24 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index c392152e48d..505023d2851 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -36,27 +36,18 @@
 
 
 static void
-brw_emit_gpgpu_walker(struct brw_context *brw,
-                      const void *compute_param,
-                      bool indirect)
+brw_emit_gpgpu_walker(struct brw_context *brw)
 {
    const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
 
-   const GLuint *num_groups;
+   const GLuint *num_groups = brw->compute.num_work_groups;
    uint32_t indirect_flag;
 
-   if (!indirect) {
-      num_groups = (const GLuint *)compute_param;
+   if (brw->compute.num_work_groups_bo == NULL) {
       indirect_flag = 0;
    } else {
-      GLintptr indirect_offset = (GLintptr)compute_param;
-      static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
-      num_groups = indirect_group_counts;
-
-      struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
-      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
-            intel_buffer_object(indirect_buffer),
-            indirect_offset, 3 * sizeof(GLuint));
+      GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+      drm_intel_bo *bo = brw->compute.num_work_groups_bo;
 
       indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
 
@@ -115,9 +106,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw,
 
 
 static void
-brw_dispatch_compute_common(struct gl_context *ctx,
-                            const void *compute_param,
-                            bool indirect)
+brw_dispatch_compute_common(struct gl_context *ctx)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
@@ -151,7 +140,7 @@ brw_dispatch_compute_common(struct gl_context *ctx,
    brw->no_batch_wrap = true;
    brw_upload_compute_state(brw);
 
-   brw_emit_gpgpu_walker(brw, compute_param, indirect);
+   brw_emit_gpgpu_walker(brw);
 
    brw->no_batch_wrap = false;
 
@@ -191,17 +180,30 @@ brw_dispatch_compute_common(struct gl_context *ctx,
 
 static void
 brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
-   brw_dispatch_compute_common(ctx,
-                               num_groups,
-                               false);
+   struct brw_context *brw = brw_context(ctx);
+
+   brw->compute.num_work_groups_bo = NULL;
+   brw->compute.num_work_groups = num_groups;
+
+   brw_dispatch_compute_common(ctx);
 }
 
 static void
 brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
 {
-   brw_dispatch_compute_common(ctx,
-                               (void *)indirect,
-                               true);
+   struct brw_context *brw = brw_context(ctx);
+   static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+   struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
+   drm_intel_bo *bo =
+      intel_bufferobj_buffer(brw,
+                             intel_buffer_object(indirect_buffer),
+                             indirect, 3 * sizeof(GLuint));
+
+   brw->compute.num_work_groups_bo = bo;
+   brw->compute.num_work_groups_offset = indirect;
+   brw->compute.num_work_groups = indirect_group_counts;
+
+   brw_dispatch_compute_common(ctx);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 27e80ea3bd6..a65cac10d98 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1251,6 +1251,17 @@ struct brw_context
       uint32_t draw_params_offset;
    } draw;
 
+   struct {
+      /**
+       * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+       * an indirect call, and num_work_groups_offset is valid. Otherwise,
+       * num_work_groups is set based on glDispatchCompute.
+       */
+      drm_intel_bo *num_work_groups_bo;
+      GLintptr num_work_groups_offset;
+      const GLuint *num_work_groups;
+   } compute;
+
    struct {
       struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
       struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
-- 
cgit v1.2.3


From d1be9d21265cf4e344a5d78b17cea7ee2c8408a1 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 01:20:55 -0700
Subject: i965/cs: Add a binding table entry for gl_NumWorkGroups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If glDispatchComputeIndirect is used, then the value for this variable
must be read from the indirect BO.

To allow the same generated code to support indirect and
glDispatchCompute, we will also setup a BO for the number of work
groups using the intel_upload_data mechanism. This will only be
required if the gl_NumWorkGroups variable is accessed.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.h | 11 ++++++++++-
 src/mesa/drivers/dri/i965/brw_fs.cpp    | 20 +++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_fs.h      |  3 ++-
 3 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a65cac10d98..e407dec8eab 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -497,6 +497,14 @@ struct brw_cs_prog_data {
    unsigned local_size[3];
    unsigned simd_size;
    bool uses_barrier;
+
+   struct {
+      /** @{
+       * surface indices the CS-specific surfaces
+       */
+      uint32_t work_groups_start;
+      /** @} */
+   } binding_table;
 };
 
 /**
@@ -758,7 +766,8 @@ struct brw_vs_prog_data {
                             12 + /* ubo */                              \
                             BRW_MAX_ABO +                               \
                             BRW_MAX_IMAGES +                            \
-                            2 /* shader time, pull constants */)
+                            2 + /* shader time, pull constants */       \
+                            1 /* cs num work groups */)
 
 #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 49dc7f65b48..64215ae5a6a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4750,7 +4750,7 @@ fs_visitor::setup_cs_payload()
 }
 
 void
-fs_visitor::assign_binding_table_offsets()
+fs_visitor::assign_fs_binding_table_offsets()
 {
    assert(stage == MESA_SHADER_FRAGMENT);
    brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
@@ -4766,6 +4766,20 @@ fs_visitor::assign_binding_table_offsets()
    assign_common_binding_table_offsets(next_binding_table_offset);
 }
 
+void
+fs_visitor::assign_cs_binding_table_offsets()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+   uint32_t next_binding_table_offset = 0;
+
+   /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+   prog_data->binding_table.work_groups_start = next_binding_table_offset;
+   next_binding_table_offset++;
+
+   assign_common_binding_table_offsets(next_binding_table_offset);
+}
+
 void
 fs_visitor::calculate_register_pressure()
 {
@@ -5020,7 +5034,7 @@ fs_visitor::run_fs(bool do_rep_send)
 
    sanity_param_count = prog->Parameters->NumParameters;
 
-   assign_binding_table_offsets();
+   assign_fs_binding_table_offsets();
 
    if (devinfo->gen >= 6)
       setup_payload_gen6();
@@ -5108,7 +5122,7 @@ fs_visitor::run_cs()
 
    sanity_param_count = prog->Parameters->NumParameters;
 
-   assign_common_binding_table_offsets(0);
+   assign_cs_binding_table_offsets();
 
    setup_cs_payload();
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 50ce4155f16..a8b6726b9f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -128,7 +128,8 @@ public:
    bool run_cs();
    void optimize();
    void allocate_registers();
-   void assign_binding_table_offsets();
+   void assign_fs_binding_table_offsets();
+   void assign_cs_binding_table_offsets();
    void setup_payload_gen4();
    void setup_payload_gen6();
    void setup_vs_payload();
-- 
cgit v1.2.3


From 63d7b33f516815ce3f9b3bb1b26a39f1545a4446 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 01:29:32 -0700
Subject: i965/cs: Setup surface binding for gl_NumWorkGroups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will only be setup when the prog_data uses_num_work_groups
boolean is set.

At this point nothing will set uses_num_work_groups, but soon code
will set it when emitting code for the intrinsic that loads
gl_NumWorkGroups.

We can't emit this surface information earlier at the start of the
DispatchCompute* call because we may not have generated the program
yet. Until we generate the program, we don't know if the
gl_NumWorkGroups variable is accessed.

We also can't emit the surface as part of the brw_cs_state atom,
because we might not need the surface if gl_NumWorkGroups is not used
by the program.

Lastly, we cannot emit the surface later (after state upload) in the
DispatchCompute* call, because it needs to be run before the
brw_cs_state atom is emitted, since it changes the surface state.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_compute.c          |  2 ++
 src/mesa/drivers/dri/i965/brw_context.h          |  5 ++-
 src/mesa/drivers/dri/i965/brw_state.h            |  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c     |  3 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 43 ++++++++++++++++++++++++
 5 files changed, 53 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 505023d2851..fe991a46153 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -184,6 +184,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
 
    brw->compute.num_work_groups_bo = NULL;
    brw->compute.num_work_groups = num_groups;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
 
    brw_dispatch_compute_common(ctx);
 }
@@ -202,6 +203,7 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
    brw->compute.num_work_groups_bo = bo;
    brw->compute.num_work_groups_offset = indirect;
    brw->compute.num_work_groups = indirect_group_counts;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
 
    brw_dispatch_compute_common(ctx);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index e407dec8eab..8b790fe0bca 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -213,6 +213,7 @@ enum brw_state_id {
    BRW_STATE_SAMPLER_STATE_TABLE,
    BRW_STATE_VS_ATTRIB_WORKAROUNDS,
    BRW_STATE_COMPUTE_PROGRAM,
+   BRW_STATE_CS_WORK_GROUPS,
    BRW_NUM_STATE_BITS
 };
 
@@ -294,6 +295,7 @@ enum brw_state_id {
 #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
 #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
 #define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -497,6 +499,7 @@ struct brw_cs_prog_data {
    unsigned local_size[3];
    unsigned simd_size;
    bool uses_barrier;
+   bool uses_num_work_groups;
 
    struct {
       /** @{
@@ -1545,7 +1548,7 @@ struct brw_context
 
    int num_atoms[BRW_NUM_PIPELINES];
    const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[6];
+   const struct brw_tracked_state compute_atoms[7];
 
    /* If (INTEL_DEBUG & DEBUG_BATCH) */
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index afce8ad3b27..3b7a4330c7a 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -158,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
 extern const struct brw_tracked_state gen8_vs_state;
+extern const struct brw_tracked_state brw_cs_work_groups_surface;
 
 static inline bool
 brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 2e8a0b3de9b..46687e342d3 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -261,6 +261,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
    &brw_cs_state,
 };
 
@@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
    &brw_cs_state,
 };
 
@@ -613,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
+   DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
    {0, 0, 0}
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index f2aaa0b178e..c9316963840 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1336,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw)
    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 }
+
+static void
+brw_upload_cs_work_groups_surface(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (prog && brw->cs.prog_data->uses_num_work_groups) {
+      const unsigned surf_idx =
+         brw->cs.prog_data->binding_table.work_groups_start;
+      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
+      drm_intel_bo *bo;
+      uint32_t bo_offset;
+
+      if (brw->compute.num_work_groups_bo == NULL) {
+         bo = NULL;
+         intel_upload_data(brw,
+                           (void *)brw->compute.num_work_groups,
+                           3 * sizeof(GLuint),
+                           sizeof(GLuint),
+                           &bo,
+                           &bo_offset);
+      } else {
+         bo = brw->compute.num_work_groups_bo;
+         bo_offset = brw->compute.num_work_groups_offset;
+      }
+
+      brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
+                                          bo, bo_offset,
+                                          BRW_SURFACEFORMAT_RAW,
+                                          3 * sizeof(GLuint), 1, true);
+      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
+}
+
+const struct brw_tracked_state brw_cs_work_groups_surface = {
+   .dirty = {
+      .brw = BRW_NEW_CS_WORK_GROUPS
+   },
+   .emit = brw_upload_cs_work_groups_surface,
+};
-- 
cgit v1.2.3


From f6ae91406966c0fdcfcd628539e34964ae0ab1a6 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 20 Aug 2015 15:56:53 -0700
Subject: glsl/cs: Add gl_NumWorkGroups as a system value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/builtin_variables.cpp | 2 +-
 src/glsl/shader_enums.h        | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index 0722051c08e..a6ad1050552 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1058,9 +1058,9 @@ builtin_variable_generator::generate_cs_special_vars()
    add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
                     "gl_LocalInvocationID");
    add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
+   add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups");
    add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
    add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
-   /* TODO: finish this. */
 }
 
 
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 39780076e74..99acc640496 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -410,6 +410,7 @@ typedef enum
    /*@{*/
    SYSTEM_VALUE_LOCAL_INVOCATION_ID,
    SYSTEM_VALUE_WORK_GROUP_ID,
+   SYSTEM_VALUE_NUM_WORK_GROUPS,
    /*@}*/
 
    /**
-- 
cgit v1.2.3


From 4c6ddd3397f9ec363342831eab710003996e77a8 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Sat, 22 Aug 2015 18:51:08 -0700
Subject: nir: Convert SYSTEM_VALUE_NUM_WORK_GROUPS to a nir intrinsic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/nir/nir.c            | 4 ++++
 src/glsl/nir/nir_intrinsics.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 1206bb4dfcb..57fd959c931 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1487,6 +1487,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
       return nir_intrinsic_load_local_invocation_id;
    case SYSTEM_VALUE_WORK_GROUP_ID:
       return nir_intrinsic_load_work_group_id;
+   case SYSTEM_VALUE_NUM_WORK_GROUPS:
+      return nir_intrinsic_load_num_work_groups;
    /* FINISHME: Add tessellation intrinsics.
    case SYSTEM_VALUE_TESS_COORD:
    case SYSTEM_VALUE_VERTICES_IN:
@@ -1523,6 +1525,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_SAMPLE_MASK_IN;
    case nir_intrinsic_load_local_invocation_id:
       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+   case nir_intrinsic_load_num_work_groups:
+      return SYSTEM_VALUE_NUM_WORK_GROUPS;
    case nir_intrinsic_load_work_group_id:
       return SYSTEM_VALUE_WORK_GROUP_ID;
    /* FINISHME: Add tessellation intrinsics.
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 06f1b02383a..649312fec51 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -198,6 +198,7 @@ SYSTEM_VALUE(invocation_id, 1, 0)
 SYSTEM_VALUE(local_invocation_id, 3, 0)
 SYSTEM_VALUE(work_group_id, 3, 0)
 SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+SYSTEM_VALUE(num_work_groups, 3, 0)
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,
-- 
cgit v1.2.3


From 681b4badaedec5c9503887c4afb32485ce22c30e Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Thu, 24 Sep 2015 01:45:40 -0700
Subject: i965/cs: Generate code to load gl_NumWorkGroups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This code also sets cs_prog_data->uses_num_work_groups which is later
used by state setup to indicate that the gl_NumWorkGroups surface
needs to be setup.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'src')

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index cf7e74256fc..7a965cd5b73 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1922,6 +1922,34 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_load_num_work_groups: {
+      assert(devinfo->gen >= 7);
+      assert(stage == MESA_SHADER_COMPUTE);
+
+      struct brw_cs_prog_data *cs_prog_data =
+         (struct brw_cs_prog_data *) prog_data;
+      const unsigned surface =
+         cs_prog_data->binding_table.work_groups_start;
+
+      cs_prog_data->uses_num_work_groups = true;
+
+      fs_reg surf_index = fs_reg(surface);
+      brw_mark_surface_used(prog_data, surface);
+
+      /* Read the 3 GLuint components of gl_NumWorkGroups */
+      for (unsigned i = 0; i < 3; i++) {
+         fs_reg read_result =
+            emit_untyped_read(bld, surf_index,
+                              fs_reg(i << 2),
+                              1 /* dims */, 1 /* size */,
+                              BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+      }
+      break;
+   }
+
    default:
       unreachable("unknown intrinsic");
    }
-- 
cgit v1.2.3


From 3bbff1e26ee8bbf63f7a954c3f24d514486939e5 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 26 Sep 2015 20:14:57 -0600
Subject: mesa: remove an extern "C" wrapper in shader_query.cpp

The shaderapi.h header already has the extern "C" wrapper.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/shader_query.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index b6d36776562..16b43e840c7 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -32,6 +32,7 @@
 #include "main/core.h"
 #include "glsl_symbol_table.h"
 #include "ir.h"
+#include "shaderapi.h"
 #include "shaderobj.h"
 #include "program/hash_table.h"
 #include "../glsl/program.h"
@@ -39,9 +40,6 @@
 #include "main/enums.h"
 #include "util/strndup.h"
 
-extern "C" {
-#include "shaderapi.h"
-}
 
 static GLint
 program_resource_location(struct gl_shader_program *shProg,
-- 
cgit v1.2.3


From b24c6d3fefabf456e19b26a0a581ae53eb3d2d19 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 26 Sep 2015 20:19:40 -0600
Subject: mesa: clean up the #includes in shader_query.cpp

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/shader_query.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 16b43e840c7..73dee85cc4d 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -30,14 +30,14 @@
 
 #include "main/context.h"
 #include "main/core.h"
-#include "glsl_symbol_table.h"
-#include "ir.h"
-#include "shaderapi.h"
-#include "shaderobj.h"
-#include "program/hash_table.h"
-#include "../glsl/program.h"
-#include "uniforms.h"
 #include "main/enums.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "glsl/glsl_symbol_table.h"
+#include "glsl/ir.h"
+#include "glsl/program.h"
+#include "program/hash_table.h"
 #include "util/strndup.h"
 
 
-- 
cgit v1.2.3


From 7147f7098e55e3d2e62516a54c1ff8e77c9052d0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 26 Sep 2015 20:22:21 -0600
Subject: mesa: clean up #includes in shaderapi.c

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/shaderapi.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 418121dcb99..82a1ec37130 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -37,6 +37,7 @@
  */
 
 
+#include <stdbool.h>
 #include "main/glheader.h"
 #include "main/context.h"
 #include "main/dispatch.h"
@@ -48,17 +49,16 @@
 #include "main/shaderobj.h"
 #include "main/transformfeedback.h"
 #include "main/uniforms.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/program.h"
 #include "program/program.h"
 #include "program/prog_print.h"
 #include "program/prog_parameter.h"
 #include "util/ralloc.h"
 #include "util/hash_table.h"
 #include "util/mesa-sha1.h"
-#include <stdbool.h>
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir.h"
-#include "../glsl/ir_uniform.h"
-#include "../glsl/program.h"
 
 
 /**
-- 
cgit v1.2.3


From daf23bd4cb944e1d78664b6780a78d021c0cec79 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 24 Sep 2015 09:34:48 -0600
Subject: st/mesa: add some debugging code in st_ChooseTextureFormat()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've temporarily added code like this many times.  Wrap it in a
conditional that can be enabled when needed.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_format.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index e3fb761aa98..0c9442862d4 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -34,6 +34,8 @@
 
 #include "main/imports.h"
 #include "main/context.h"
+#include "main/enums.h"
+#include "main/formats.h"
 #include "main/glformats.h"
 #include "main/texgetimage.h"
 #include "main/teximage.h"
@@ -1938,6 +1940,7 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
 {
    struct st_context *st = st_context(ctx);
    enum pipe_format pFormat;
+   mesa_format mFormat;
    unsigned bindings;
    enum pipe_texture_target pTarget = gl_target_to_pipe(target);
 
@@ -2010,7 +2013,20 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
       return MESA_FORMAT_NONE;
    }
 
-   return st_pipe_format_to_mesa_format(pFormat);
+   mFormat = st_pipe_format_to_mesa_format(pFormat);
+
+   /* Debugging aid */
+   if (0) {
+      debug_printf("%s(intFormat=%s, format=%s, type=%s) -> %s, %s\n",
+                   __func__,
+                   _mesa_enum_to_string(internalFormat),
+                   _mesa_enum_to_string(format),
+                   _mesa_enum_to_string(type),
+                   util_format_name(pFormat),
+                   _mesa_get_format_name(mFormat));
+   }
+
+   return mFormat;
 }
 
 
-- 
cgit v1.2.3


From cb758b892a7e62ff1f6187f2ca9ac543ff70a096 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 24 Sep 2015 09:36:44 -0600
Subject: st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture
 formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For 8-bit RGB(A) texture formats we set the PIPE_BIND_RENDER_TARGET flag
to try to get a hardware format which also supports rendering (for FBO
textures).  Do the same thing for floating point formats.

This allows the Redway3D Flat demo to run.

Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/mesa/state_tracker/st_format.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 0c9442862d4..144b7d6f659 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1963,7 +1963,11 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
    else if (internalFormat == 3 || internalFormat == 4 ||
             internalFormat == GL_RGB || internalFormat == GL_RGBA ||
             internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
-            internalFormat == GL_BGRA)
+            internalFormat == GL_BGRA ||
+            internalFormat == GL_RGB16F ||
+            internalFormat == GL_RGBA16F ||
+            internalFormat == GL_RGB32F ||
+            internalFormat == GL_RGBA32F)
 	 bindings |= PIPE_BIND_RENDER_TARGET;
 
    /* GLES allows the driver to choose any format which matches
-- 
cgit v1.2.3