From 54e0a8ed5dcaaa0ef483d5960ae86f88e0bf8990 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 12 Jun 2015 15:08:02 +0800
Subject: ilo: add ilo_state_ps to ilo_shader_cso

---
 .../drivers/ilo/core/ilo_builder_3d_bottom.h       | 190 +++-----------
 src/gallium/drivers/ilo/core/ilo_state_3d.h        |  19 --
 src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c | 280 ---------------------
 src/gallium/drivers/ilo/ilo_blitter.h              |   1 +
 src/gallium/drivers/ilo/ilo_blitter_rectlist.c     |   1 +
 src/gallium/drivers/ilo/ilo_render_gen6.c          |  11 +-
 src/gallium/drivers/ilo/ilo_render_gen7.c          |  20 +-
 src/gallium/drivers/ilo/ilo_render_gen8.c          |  11 +-
 src/gallium/drivers/ilo/ilo_render_surface.c       |   1 +
 src/gallium/drivers/ilo/ilo_shader.c               |  88 ++++++-
 src/gallium/drivers/ilo/ilo_shader.h               |  15 ++
 src/gallium/drivers/ilo/ilo_state.c                |   2 +-
 .../drivers/ilo/shader/ilo_shader_internal.h       |   2 +
 13 files changed, 160 insertions(+), 481 deletions(-)

diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 68461fff09d..88ed6ea054c 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -29,7 +29,6 @@
 #define ILO_BUILDER_3D_BOTTOM_H
 
 #include "genhw/genhw.h"
-#include "../ilo_shader.h"
 #include "intel_winsys.h"
 
 #include "ilo_core.h"
@@ -38,6 +37,7 @@
 #include "ilo_state_cc.h"
 #include "ilo_state_raster.h"
 #include "ilo_state_sbe.h"
+#include "ilo_state_shader.h"
 #include "ilo_state_viewport.h"
 #include "ilo_builder.h"
 #include "ilo_builder_3d_top.h"
@@ -200,56 +200,24 @@ gen8_3DSTATE_RASTER(struct ilo_builder *builder,
 static inline void
 gen6_3DSTATE_WM(struct ilo_builder *builder,
                 const struct ilo_state_raster *rs,
-                const struct ilo_shader_state *fs,
-                bool dual_blend, bool cc_may_kill)
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 9;
-   const bool multisample = false;
-   const int num_samples = 1;
-   uint32_t dw2, dw4, dw5, dw6, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   dw2 = 0;
-   /* see raster_set_gen6_3dstate_wm() */
-   dw4 = rs->raster[0];
-   dw5 = rs->raster[1];
-   dw6 = rs->raster[2];
-
-   if (fs) {
-      const union ilo_shader_cso *cso;
-
-      cso = ilo_shader_get_kernel_cso(fs);
-      /* see fs_init_cso_gen6() */
-      dw2 |= cso->ps_payload[0];
-      dw4 |= cso->ps_payload[1];
-      dw5 |= cso->ps_payload[2];
-      dw6 |= cso->ps_payload[3];
-   } else {
-      const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
-
-      /* honor the valid range even if dispatching is disabled */
-      dw5 |= (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-   }
-
-   if (cc_may_kill)
-      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
-   if (dual_blend)
-      dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
-
-   if (multisample && num_samples > 1)
-      dw6 |= GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
-   dw[6] = dw6;
+   dw[1] = kernel_offset;
+   /* see raster_set_gen6_3dstate_wm() and ps_set_gen6_3dstate_wm() */
+   dw[2] = ps->ps[0];
+   dw[3] = ps->ps[1];
+   dw[4] = rs->wm[0] | ps->ps[2];
+   dw[5] = rs->wm[1] | ps->ps[3];
+   dw[6] = rs->wm[2] | ps->ps[4];
    dw[7] = 0; /* kernel 1 */
    dw[8] = 0; /* kernel 2 */
 }
@@ -257,39 +225,19 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
 static inline void
 gen7_3DSTATE_WM(struct ilo_builder *builder,
                 const struct ilo_state_raster *rs,
-                const struct ilo_shader_state *fs,
-                bool cc_may_kill)
+                const struct ilo_state_ps *ps)
 {
    const uint8_t cmd_len = 3;
-   const bool multisample = false;
-   const int num_samples = 1;
-   uint32_t dw1, dw2, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   /* see raster_set_gen8_3DSTATE_WM() */
-   dw1 = rs->wm[0];
-
-   if (fs) {
-      const union ilo_shader_cso *cso;
-
-      cso = ilo_shader_get_kernel_cso(fs);
-      /* see fs_init_cso_gen7() */
-      dw1 |= cso->ps_payload[3];
-   }
-
-   if (cc_may_kill)
-      dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL;
-
-   dw2 = 0;
-   if (multisample && num_samples > 1)
-      dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = dw1;
-   dw[2] = dw2;
+   /* see raster_set_gen8_3DSTATE_WM() and ps_set_gen7_3dstate_wm() */
+   dw[1] = rs->wm[0] | ps->ps[0];
+   dw[2] = ps->ps[1];
 }
 
 static inline void
@@ -379,100 +327,48 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
 
 static inline void
 gen7_3DSTATE_PS(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs,
-                bool dual_blend)
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 8;
-   const union ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   /* see fs_init_cso_gen7() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw2 = cso->ps_payload[0];
-   dw4 = cso->ps_payload[1];
-   dw5 = cso->ps_payload[2];
-
-   if (dual_blend)
-      dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
+   dw[1] = kernel_offset;
+   /* see ps_set_gen7_3DSTATE_PS() */
+   dw[2] = ps->ps[2];
+   dw[3] = ps->ps[3];
+   dw[4] = ps->ps[4];
+   dw[5] = ps->ps[5];
    dw[6] = 0; /* kernel 1 */
    dw[7] = 0; /* kernel 2 */
 }
 
-static inline void
-gen7_disable_3DSTATE_PS(struct ilo_builder *builder)
-{
-   const uint8_t cmd_len = 8;
-   int max_threads;
-   uint32_t dw4, *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
-   /* GPU hangs if none of the dispatch enable bits is set */
-   dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
-   /* see brwCreateContext() */
-   switch (ilo_dev_gen(builder->dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (builder->dev->gt == 3) ? 408 :
-                    (builder->dev->gt == 2) ? 204 : 102;
-      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   case ILO_GEN(7):
-   default:
-      max_threads = (builder->dev->gt == 2) ? 172 : 48;
-      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   }
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = dw4;
-   dw[5] = 0;
-   dw[6] = 0;
-   dw[7] = 0;
-}
-
 static inline void
 gen8_3DSTATE_PS(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs)
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 12;
-   const union ilo_shader_cso *cso;
-   uint32_t dw3, dw6, dw7, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   /* see fs_init_cso_gen8() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw3 = cso->ps_payload[0];
-   dw6 = cso->ps_payload[1];
-   dw7 = cso->ps_payload[2];
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
+   dw[1] = kernel_offset;
    dw[2] = 0;
-   dw[3] = dw3;
-   dw[4] = 0; /* scratch */
+   /* see ps_set_gen8_3DSTATE_PS() */
+   dw[3] = ps->ps[0];
+   dw[4] = ps->ps[1];
    dw[5] = 0;
-   dw[6] = dw6;
-   dw[7] = dw7;
+   dw[6] = ps->ps[2];
+   dw[7] = ps->ps[3];
    dw[8] = 0; /* kernel 1 */
    dw[9] = 0;
    dw[10] = 0; /* kernel 2 */
@@ -481,28 +377,18 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
 
 static inline void
 gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder,
-                      const struct ilo_shader_state *fs,
-                      bool cc_may_kill, bool per_sample)
+                      const struct ilo_state_ps *ps)
 {
    const uint8_t cmd_len = 2;
-   const union ilo_shader_cso *cso;
-   uint32_t dw1, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   /* see fs_init_cso_gen8() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw1 = cso->ps_payload[3];
-
-   if (cc_may_kill)
-      dw1 |= GEN8_PSX_DW1_VALID | GEN8_PSX_DW1_KILL_PIXEL;
-   if (per_sample)
-      dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2);
-   dw[1] = dw1;
+   /* see ps_set_gen8_3DSTATE_PS_EXTRA() */
+   dw[1] = ps->ps[4];
 }
 
 static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h
index b2087df3470..dcc94bfc88c 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_3d.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h
@@ -114,25 +114,6 @@ struct ilo_fb_state {
    enum gen_depth_format depth_offset_format;
 };
 
-union ilo_shader_cso {
-   struct ilo_state_vs vs;
-   struct ilo_state_hs hs;
-   struct ilo_state_ds ds;
-   struct ilo_state_gs gs;
-
-   uint32_t ps_payload[5];
-
-   struct {
-      struct ilo_state_vs vs;
-      struct ilo_state_gs sol;
-   } vs_sol;
-};
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    union ilo_shader_cso *cso);
-
 void
 ilo_gpe_set_fb(const struct ilo_dev *dev,
                const struct pipe_framebuffer_state *state,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
index 004904fcd08..8734aff44da 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
@@ -32,286 +32,6 @@
 #include "ilo_format.h"
 #include "ilo_image.h"
 #include "ilo_state_3d.h"
-#include "../ilo_shader.h"
-
-static void
-fs_init_cso_gen6(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 union ilo_shader_cso *cso)
-{
-   int start_grf, input_count, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   /* see brwCreateContext() */
-   max_threads = (dev->gt == 2) ? 80 : 40;
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
-    *      PS kernel or color calculator has the ability to kill (discard)
-    *      pixels or samples, other than due to depth or stencil testing.
-    *      This bit is required to be ENABLED in the following situations:
-    *
-    *      The API pixel shader program contains "killpix" or "discard"
-    *      instructions, or other code in the pixel shader kernel that can
-    *      cause the final pixel mask to differ from the pixel mask received
-    *      on dispatch.
-    *
-    *      A sampler with chroma key enabled with kill pixel mode is used by
-    *      the pixel shader.
-    *
-    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
-    *      enabled.
-    *
-    *      The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware and
-    *      therefore not via PS instructions, there should be no need to
-    *      ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
-    *      field must be set to disabled."
-    *
-    * TODO This is not checked yet.
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw5 |= GEN6_WM_DW5_PS_USE_W;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   if (true)
-      dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
-
-   dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
-         GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
-   cso->ps_payload[0] = dw2;
-   cso->ps_payload[1] = dw4;
-   cso->ps_payload[2] = dw5;
-   cso->ps_payload[3] = dw6;
-}
-
-static uint32_t
-fs_get_wm_gen7(const struct ilo_dev *dev,
-               const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   dw = 0;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 278:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
-    *      the PS kernel or color calculator has the ability to kill
-    *      (discard) pixels or samples, other than due to depth or stencil
-    *      testing. This bit is required to be ENABLED in the following
-    *      situations:
-    *
-    *      - The API pixel shader program contains "killpix" or "discard"
-    *        instructions, or other code in the pixel shader kernel that
-    *        can cause the final pixel mask to differ from the pixel mask
-    *        received on dispatch.
-    *
-    *      - A sampler with chroma key enabled with kill pixel mode is used
-    *        by the pixel shader.
-    *
-    *      - Any render target has Alpha Test Enable or AlphaToCoverage
-    *        Enable enabled.
-    *
-    *      - The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware
-    *      and therefore not via PS instructions, there should be no need
-    *      to ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN7_PSCDEPTH_ON << GEN7_WM_DW1_PSCDEPTH__SHIFT;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN7_WM_DW1_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN7_WM_DW1_PS_USE_W;
-
-   return dw;
-}
-
-static void
-fs_init_cso_gen7(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 union ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = GEN6_POSOFFSET_NONE << GEN7_PS_DW4_POSOFFSET__SHIFT;
-
-   /* see brwCreateContext() */
-   switch (ilo_dev_gen(dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
-      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
-      dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
-      break;
-   case ILO_GEN(7):
-   default:
-      max_threads = (dev->gt == 2) ? 172 : 48;
-      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   }
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
-   dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
-   cso->ps_payload[0] = dw2;
-   cso->ps_payload[1] = dw4;
-   cso->ps_payload[2] = dw5;
-   cso->ps_payload[3] = fs_get_wm_gen7(dev, fs);
-}
-
-static uint32_t
-fs_get_psx_gen8(const struct ilo_dev *dev,
-                const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   dw = GEN8_PSX_DW1_VALID;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN8_PSX_DW1_KILL_PIXEL;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN7_PSCDEPTH_ON << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN8_PSX_DW1_USE_DEPTH;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN8_PSX_DW1_USE_W;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw |= GEN8_PSX_DW1_ATTR_ENABLE;
-
-   return dw;
-}
-
-static void
-fs_init_cso_gen8(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 union ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count;
-   uint32_t dw3, dw6, dw7;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   /* always 64? */
-   dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
-         GEN6_POSOFFSET_NONE << GEN8_PS_DW6_POSOFFSET__SHIFT;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
-
-   dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
-   cso->ps_payload[0] = dw3;
-   cso->ps_payload[1] = dw6;
-   cso->ps_payload[2] = dw7;
-   cso->ps_payload[3] = fs_get_psx_gen8(dev, fs);
-}
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    union ilo_shader_cso *cso)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      fs_init_cso_gen8(dev, fs, cso);
-   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
-      fs_init_cso_gen7(dev, fs, cso);
-   else
-      fs_init_cso_gen6(dev, fs, cso);
-}
 
 static void
 fb_set_blend_caps(const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h
index 392f784a503..08690f30378 100644
--- a/src/gallium/drivers/ilo/ilo_blitter.h
+++ b/src/gallium/drivers/ilo/ilo_blitter.h
@@ -74,6 +74,7 @@ struct ilo_blitter {
    uint32_t vp_data[20];
 
    struct ilo_state_sbe sbe;
+   struct ilo_state_ps ps;
    struct ilo_state_cc cc;
 
    uint32_t depth_clear_value;
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 9cc57f86f68..a4c8dead4a5 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -78,6 +78,7 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
          blitter->vp_data, sizeof(blitter->vp_data));
 
    ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0);
+   ilo_state_ps_init_disabled(&blitter->ps, blitter->ilo->dev);
 
    ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
          ilo_state_vf_get_attr_count(&blitter->vf));
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index 2f6743c8b28..30abead0cdc 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -666,14 +666,17 @@ gen6_draw_wm(struct ilo_render *r,
    }
 
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(BLEND) ||
+   if (DIRTY(FS) ||
        (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) ||
        r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
+
       if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
          gen6_wa_pre_3dstate_wm_max_threads(r);
 
-      gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs,
-            vec->blend->dual_blend, vec->blend->alpha_may_kill);
+      gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
+            &cso->ps, kernel_offset);
    }
 }
 
@@ -840,7 +843,7 @@ gen6_rectlist_wm(struct ilo_render *r,
    gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen6_wa_pre_3dstate_wm_max_threads(r);
-   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false, false);
+   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 04da1c41261..0b2245c80da 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -499,12 +499,12 @@ gen7_draw_wm(struct ilo_render *r,
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
+   const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+   const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
+
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(BLEND) ||
-       (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) {
-      gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs,
-            vec->blend->alpha_may_kill);
-   }
+   if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM))
+      gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, &cso->ps);
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_PS */
    if (session->binding_table_fs_changed) {
@@ -527,13 +527,11 @@ gen7_draw_wm(struct ilo_render *r,
    }
 
    /* 3DSTATE_PS */
-   if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
-      const bool dual_blend = vec->blend->dual_blend;
-
+   if (DIRTY(FS) || r->instruction_bo_changed) {
       if (r->hw_ctx_changed)
          gen7_wa_pre_3dstate_ps_max_threads(r);
 
-      gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
+      gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
    }
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -714,12 +712,12 @@ static void
 gen7_rectlist_wm(struct ilo_render *r,
                  const struct ilo_blitter *blitter)
 {
-   gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false);
+   gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps);
 
    gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen7_wa_pre_3dstate_ps_max_threads(r);
-   gen7_disable_3DSTATE_PS(r->builder);
+   gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
 }
 
 static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 691c378c864..2ce71fb161e 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -86,6 +86,9 @@ gen8_draw_wm(struct ilo_render *r,
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
+   const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+   const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
+
    /* 3DSTATE_WM */
    if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)
       gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs);
@@ -121,13 +124,11 @@ gen8_draw_wm(struct ilo_render *r,
 
    /* 3DSTATE_PS */
    if (DIRTY(FS) || r->instruction_bo_changed)
-      gen8_3DSTATE_PS(r->builder, vec->fs);
+      gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
 
    /* 3DSTATE_PS_EXTRA */
-   if (DIRTY(FS) || DIRTY(BLEND)) {
-      gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs,
-            vec->blend->alpha_may_kill, false);
-   }
+   if (DIRTY(FS))
+      gen8_3DSTATE_PS_EXTRA(r->builder, &cso->ps);
 
    /* 3DSTATE_PS_BLEND */
    if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND)
diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c
index 729cb604eaf..bbdd5fe7a0a 100644
--- a/src/gallium/drivers/ilo/ilo_render_surface.c
+++ b/src/gallium/drivers/ilo/ilo_render_surface.c
@@ -29,6 +29,7 @@
 
 #include "ilo_common.h"
 #include "ilo_blitter.h"
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 29c2bf5f8f4..93a26268a29 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -27,7 +27,6 @@
 
 #include "genhw/genhw.h" /* for SBE setup */
 #include "core/ilo_builder.h"
-#include "core/ilo_state_3d.h"
 #include "core/intel_winsys.h"
 #include "shader/ilo_shader_internal.h"
 #include "tgsi/tgsi_parse.h"
@@ -654,6 +653,60 @@ init_gs(struct ilo_shader *kernel,
    ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info);
 }
 
+static void
+init_ps(struct ilo_shader *kernel,
+        const struct ilo_shader_state *state)
+{
+   struct ilo_state_ps_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   init_shader_kernel(kernel, state, &info.kernel_8);
+   init_shader_resource(kernel, state, &info.resource);
+
+   info.io.has_rt_write = true;
+   info.io.posoffset = GEN6_POSOFFSET_NONE;
+   info.io.attr_count = kernel->in.count;
+   info.io.use_z = kernel->in.has_pos;
+   info.io.use_w = kernel->in.has_pos;
+   info.io.use_coverage_mask = false;
+   info.io.pscdepth = (kernel->out.has_pos) ?
+      GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF;
+   info.io.write_pixel_mask = kernel->has_kill;
+   info.io.write_omask = false;
+
+   info.params.sample_mask = 0x1;
+   info.params.earlyz_control_psexec = false;
+   info.params.alpha_may_kill = false;
+   info.params.dual_source_blending = false;
+   info.params.has_writeable_rt = true;
+
+   info.valid_kernels = GEN6_PS_DISPATCH_8;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 284:
+    *
+    *     "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode
+    *      where (over and above PERPIXEL mode) the PS is run for each covered
+    *      sample. This mode is also used for "normal" non-multisample
+    *      rendering (aka 1X), given Number of Multisamples is programmed to
+    *      NUMSAMPLES_1."
+    */
+   info.per_sample_dispatch = true;
+
+   info.rt_clear_enable = false;
+   info.rt_resolve_enable = false;
+   info.cv_per_sample_interp = false;
+   info.cv_has_earlyz_op = false;
+   info.sample_count_one = true;
+   info.cv_has_depth_buffer = true;
+
+   ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info);
+
+   /* remember current parameters */
+   kernel->ps_params = info.params;
+}
+
 static void
 init_sol(struct ilo_shader *kernel,
          const struct ilo_dev *dev,
@@ -837,7 +890,7 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state,
          init_gs(sh, state);
          break;
       case PIPE_SHADER_FRAGMENT:
-         ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
+         init_ps(sh, state);
          break;
       default:
          break;
@@ -955,16 +1008,33 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
                          const struct ilo_state_vector *vec,
                          uint32_t dirty)
 {
-   const struct ilo_shader * const cur = shader->shader;
    struct ilo_shader_variant variant;
+   bool changed = false;
 
-   if (!(shader->info.non_orthogonal_states & dirty))
-      return false;
+   if (shader->info.non_orthogonal_states & dirty) {
+      const struct ilo_shader * const old = shader->shader;
+
+      ilo_shader_variant_init(&variant, &shader->info, vec);
+      ilo_shader_state_use_variant(shader, &variant);
+      changed = (shader->shader != old);
+   }
+
+   if (shader->info.type == PIPE_SHADER_FRAGMENT) {
+      struct ilo_shader *kernel = shader->shader;
+
+      if (kernel->ps_params.sample_mask != vec->sample_mask ||
+          kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) {
+         kernel->ps_params.sample_mask = vec->sample_mask;
+         kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill;
 
-   ilo_shader_variant_init(&variant, &shader->info, vec);
-   ilo_shader_state_use_variant(shader, &variant);
+         ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev,
+               &kernel->ps_params);
 
-   return (shader->shader != cur);
+         changed = true;
+      }
+   }
+
+   return changed;
 }
 
 static int
@@ -1063,8 +1133,8 @@ ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
    assert(kernel->in.count <= Elements(swizzles));
    dst_len = MIN2(kernel->in.count, Elements(swizzles));
 
-   memset(&info, 0, sizeof(info));
    memset(&swizzles, 0, sizeof(swizzles));
+   memset(&info, 0, sizeof(info));
 
    info.attr_count = dst_len;
    info.cv_vue_attr_count = src_skip + src_len;
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index 0f20877f83d..d9f02a4746a 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -28,6 +28,8 @@
 #ifndef ILO_SHADER_H
 #define ILO_SHADER_H
 
+#include "core/ilo_state_shader.h"
+
 #include "ilo_common.h"
 
 enum ilo_kernel_param {
@@ -90,6 +92,19 @@ struct ilo_state_sbe;
 struct ilo_state_sol;
 struct ilo_state_vector;
 
+union ilo_shader_cso {
+   struct ilo_state_vs vs;
+   struct ilo_state_hs hs;
+   struct ilo_state_ds ds;
+   struct ilo_state_gs gs;
+   struct ilo_state_ps ps;
+
+   struct {
+      struct ilo_state_vs vs;
+      struct ilo_state_gs sol;
+   } vs_sol;
+};
+
 struct ilo_shader_cache *
 ilo_shader_cache_create(void);
 
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 902b6d3d1de..917839fa23e 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -698,6 +698,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
 {
    ilo->state_vector.draw = draw;
 
+   finalize_blend(ilo);
    finalize_shader_states(&ilo->state_vector);
    finalize_constant_buffers(ilo);
    finalize_index_buffer(ilo);
@@ -706,7 +707,6 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
    finalize_urb(ilo);
    finalize_rasterizer(ilo);
    finalize_viewport(ilo);
-   finalize_blend(ilo);
 
    u_upload_unmap(ilo->uploader);
 }
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index 9c17ec0e58d..01c86675202 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -77,6 +77,7 @@ struct ilo_shader_variant {
 
 struct ilo_kernel_routing {
    bool initialized;
+
    bool is_point;
    bool light_twoside;
    uint32_t sprite_coord_enable;
@@ -140,6 +141,7 @@ struct ilo_shader {
    int kernel_size;
 
    struct ilo_kernel_routing routing;
+   struct ilo_state_ps_params_info ps_params;
 
    /* what does the push constant buffer consist of? */
    struct {
-- 
cgit v1.2.3