summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2014-09-12 23:44:19 +0800
committerChia-I Wu <[email protected]>2014-09-13 09:31:08 +0800
commitea8e7a8d4a32ff8d3eea2dce871cfbd6b833cc87 (patch)
tree1b65ca57942ed18f57bace7ae07476bb385f765e
parentaec8521166d8acc9211db864a24ec087d7d2e7f2 (diff)
ilo: move 3D functions to ilo_builder_3d*.h
Move functions for the 3D pipeline to the new headers. We artificially split the functions into top (vertex processing) and bottom (pixel processing), to keep the headers at reasonable sizes.
-rw-r--r--src/gallium/drivers/ilo/Makefile.sources4
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c3
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_rectlist.c6
-rw-r--r--src/gallium/drivers/ilo/ilo_builder_3d.h125
-rw-r--r--src/gallium/drivers/ilo/ilo_builder_3d_bottom.h1334
-rw-r--r--src/gallium/drivers/ilo/ilo_builder_3d_top.h (renamed from src/gallium/drivers/ilo/ilo_gpe_gen7.h)1796
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen6.h1879
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen7.c3
9 files changed, 2614 insertions, 2538 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index d086025e248..0a631e809af 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -16,6 +16,9 @@ C_SOURCES := \
ilo_blitter_rectlist.c \
ilo_builder.c \
ilo_builder.h \
+ ilo_blitter_3d.h \
+ ilo_blitter_3d_bottom.h \
+ ilo_blitter_3d_top.h \
ilo_blitter_blt.h \
ilo_builder_decode.c \
ilo_builder_media.h \
@@ -31,7 +34,6 @@ C_SOURCES := \
ilo_gpe_gen6.c \
ilo_gpe_gen6.h \
ilo_gpe_gen7.c \
- ilo_gpe_gen7.h \
ilo_gpe.h \
ilo_gpgpu.c \
ilo_gpgpu.h \
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
index 88dd3ae67fd..3e37c68f3f9 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
@@ -31,12 +31,11 @@
#include "ilo_3d.h"
#include "ilo_blitter.h"
+#include "ilo_builder_3d.h"
#include "ilo_builder_mi.h"
#include "ilo_builder_render.h"
#include "ilo_context.h"
#include "ilo_cp.h"
-#include "ilo_gpe_gen6.h"
-#include "ilo_gpe_gen7.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline.h"
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
index fd1daf50d7d..14c0823df63 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
@@ -29,10 +29,10 @@
#include "util/u_dual_blend.h"
#include "ilo_blitter.h"
+#include "ilo_builder_3d.h"
#include "ilo_builder_render.h"
#include "ilo_context.h"
#include "ilo_cp.h"
-#include "ilo_gpe_gen7.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline.h"
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 009780a1cd9..5dd0b1ec5a9 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -28,13 +28,13 @@
#include "util/u_draw.h"
#include "util/u_pack_color.h"
-#include "ilo_blitter.h"
#include "ilo_3d.h"
#include "ilo_3d_pipeline.h"
+#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */
+#include "ilo_gpe_gen6.h" /* for zs_align_surface() */
#include "ilo_blit.h"
#include "ilo_gpe.h"
-#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and
- zs_align_surface */
+#include "ilo_blitter.h"
/**
* Set the states that are invariant between all ops.
diff --git a/src/gallium/drivers/ilo/ilo_builder_3d.h b/src/gallium/drivers/ilo/ilo_builder_3d.h
new file mode 100644
index 00000000000..c94fd718ee3
--- /dev/null
+++ b/src/gallium/drivers/ilo/ilo_builder_3d.h
@@ -0,0 +1,125 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_BUILDER_3D_H
+#define ILO_BUILDER_3D_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_common.h"
+#include "ilo_builder_3d_top.h"
+#include "ilo_builder_3d_bottom.h"
+
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+static inline int
+ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
+{
+ static const int prim_mapping[PIPE_PRIM_MAX] = {
+ [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
+ [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
+ [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
+ [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
+ [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
+ [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
+ [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
+ [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
+ };
+
+ assert(prim_mapping[prim]);
+
+ return prim_mapping[prim];
+}
+
+static inline void
+gen6_3DPRIMITIVE(struct ilo_builder *builder,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist)
+{
+ const uint8_t cmd_len = 6;
+ const int prim = (rectlist) ?
+ GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+ uint32_t dw0, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
+ vb_access |
+ prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
+ (cmd_len - 2);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = info->count;
+ dw[2] = vb_start;
+ dw[3] = info->instance_count;
+ dw[4] = info->start_instance;
+ dw[5] = info->index_bias;
+}
+
+static inline void
+gen7_3DPRIMITIVE(struct ilo_builder *builder,
+ const struct pipe_draw_info *info,
+ const struct ilo_ib_state *ib,
+ bool rectlist)
+{
+ const uint8_t cmd_len = 7;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
+ const int prim = (rectlist) ?
+ GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+ const int vb_access = (info->indexed) ?
+ GEN7_3DPRIM_DW1_ACCESS_RANDOM :
+ GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
+ const uint32_t vb_start = info->start +
+ ((info->indexed) ? ib->draw_start_offset : 0);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = vb_access | prim;
+ dw[2] = info->count;
+ dw[3] = vb_start;
+ dw[4] = info->instance_count;
+ dw[5] = info->start_instance;
+ dw[6] = info->index_bias;
+}
+
+#endif /* ILO_BUILDER_3D_H */
diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
new file mode 100644
index 00000000000..6427228a64c
--- /dev/null
+++ b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
@@ -0,0 +1,1334 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef ILO_BUILDER_3D_BOTTOM_H
+#define ILO_BUILDER_3D_BOTTOM_H
+
+#include "genhw/genhw.h"
+#include "intel_winsys.h"
+
+#include "ilo_common.h"
+#include "ilo_format.h"
+#include "ilo_shader.h"
+#include "ilo_builder.h"
+#include "ilo_builder_3d_top.h"
+
+static inline void
+gen6_3DSTATE_CLIP(struct ilo_builder *builder,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ bool enable_guardband,
+ int num_viewports)
+{
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
+ uint32_t dw1, dw2, dw3, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ if (rasterizer) {
+ int interps;
+
+ dw1 = rasterizer->clip.payload[0];
+ dw2 = rasterizer->clip.payload[1];
+ dw3 = rasterizer->clip.payload[2];
+
+ if (enable_guardband && rasterizer->clip.can_enable_guardband)
+ dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
+
+ interps = (fs) ? ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
+
+ if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
+ GEN6_INTERP_NONPERSPECTIVE_CENTROID |
+ GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
+ dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
+
+ dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
+ (num_viewports - 1);
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw3 = 0;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = dw1;
+ dw[2] = dw2;
+ dw[3] = dw3;
+}
+
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+static inline void
+ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ int num_samples,
+ enum pipe_format depth_format,
+ uint32_t *payload, unsigned payload_len)
+{
+ assert(payload_len == Elements(rasterizer->sf.payload));
+
+ if (rasterizer) {
+ const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
+
+ memcpy(payload, sf->payload, sizeof(sf->payload));
+ if (num_samples > 1)
+ payload[1] |= sf->dw_msaa;
+ }
+ else {
+ payload[0] = 0;
+ payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
+ payload[2] = 0;
+ payload[3] = 0;
+ payload[4] = 0;
+ payload[5] = 0;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ int format;
+
+ /* separate stencil */
+ switch (depth_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ format = GEN6_ZFORMAT_D16_UNORM;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ format = GEN6_ZFORMAT_D32_FLOAT;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ break;
+ default:
+ /* FLOAT surface is assumed when there is no depth buffer */
+ format = GEN6_ZFORMAT_D32_FLOAT;
+ break;
+ }
+
+ payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
+ }
+}
+
+/**
+ * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
+ */
+static inline void
+ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs,
+ uint32_t *dw, int num_dwords)
+{
+ int output_count, vue_offset, vue_len;
+ const struct ilo_kernel_routing *routing;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+ assert(num_dwords == 13);
+
+ if (!fs) {
+ memset(dw, 0, sizeof(dw[0]) * num_dwords);
+ dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
+ return;
+ }
+
+ output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ assert(output_count <= 32);
+
+ routing = ilo_shader_get_kernel_routing(fs);
+
+ vue_offset = routing->source_skip;
+ assert(vue_offset % 2 == 0);
+ vue_offset /= 2;
+
+ vue_len = (routing->source_len + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
+
+ dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
+ vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
+ vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
+ if (routing->swizzle_enable)
+ dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
+
+ switch (rasterizer->state.sprite_coord_mode) {
+ case PIPE_SPRITE_COORD_UPPER_LEFT:
+ dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
+ break;
+ case PIPE_SPRITE_COORD_LOWER_LEFT:
+ dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
+ break;
+ }
+
+ STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+ memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ *
+ * TODO We do not check that yet.
+ */
+ dw[9] = routing->point_sprite_enable;
+
+ dw[10] = routing->const_interp_enable;
+
+ /* WrapShortest enables */
+ dw[11] = 0;
+ dw[12] = 0;
+}
+
+static inline void
+gen6_3DSTATE_SF(struct ilo_builder *builder,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs)
+{
+ const uint8_t cmd_len = 20;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
+ uint32_t payload_raster[6], payload_sbe[13], *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer,
+ 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer,
+ fs, payload_sbe, Elements(payload_sbe));
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = payload_sbe[0];
+ memcpy(&dw[2], payload_raster, sizeof(payload_raster));
+ memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4);
+}
+
+static inline void
+gen7_3DSTATE_SF(struct ilo_builder *builder,
+ const struct ilo_rasterizer_state *rasterizer,
+ enum pipe_format zs_format)
+{
+ const uint8_t cmd_len = 7;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
+ const int num_samples = 1;
+ uint32_t payload[6], *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev,
+ rasterizer, num_samples, zs_format,
+ payload, Elements(payload));
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ memcpy(&dw[1], payload, sizeof(payload));
+}
+
+static inline void
+gen7_3DSTATE_SBE(struct ilo_builder *builder,
+ const struct ilo_rasterizer_state *rasterizer,
+ const struct ilo_shader_state *fs)
+{
+ const uint8_t cmd_len = 14;
+ const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
+ uint32_t payload[13], *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev,
+ rasterizer, fs, payload, Elements(payload));
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ memcpy(&dw[1], payload, sizeof(payload));
+}
+
+static inline void
+gen6_3DSTATE_WM(struct ilo_builder *builder,
+ const struct ilo_shader_state *fs,
+ int num_samplers,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool dual_blend, bool cc_may_kill,
+ uint32_t hiz_op)
+{
+ const uint8_t cmd_len = 9;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
+ const int num_samples = 1;
+ const struct ilo_shader_cso *fs_cso;
+ uint32_t dw2, dw4, dw5, dw6, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ if (!fs) {
+ /* see brwCreateContext() */
+ const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw[4] = hiz_op;
+ /* honor the valid range even if dispatching is disabled */
+ dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+ dw[6] = 0;
+ dw[7] = 0;
+ dw[8] = 0;
+
+ return;
+ }
+
+ fs_cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = fs_cso->payload[0];
+ dw4 = fs_cso->payload[1];
+ dw5 = fs_cso->payload[2];
+ dw6 = fs_cso->payload[3];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) must be disabled if either of these
+ * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
+ * Enable or Depth Buffer Resolve Enable."
+ */
+ assert(!hiz_op);
+ dw4 |= GEN6_WM_DW4_STATISTICS;
+
+ if (cc_may_kill)
+ dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE;
+
+ if (dual_blend)
+ dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND;
+
+ dw5 |= rasterizer->wm.payload[0];
+
+ dw6 |= rasterizer->wm.payload[1];
+
+ if (num_samples > 1) {
+ dw6 |= rasterizer->wm.dw_msaa_rast |
+ rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = ilo_shader_get_kernel_offset(fs);
+ dw[2] = dw2;
+ dw[3] = 0; /* scratch */
+ dw[4] = dw4;
+ dw[5] = dw5;
+ dw[6] = dw6;
+ dw[7] = 0; /* kernel 1 */
+ dw[8] = 0; /* kernel 2 */
+}
+
+static inline void
+gen7_3DSTATE_WM(struct ilo_builder *builder,
+ const struct ilo_shader_state *fs,
+ const struct ilo_rasterizer_state *rasterizer,
+ bool cc_may_kill, uint32_t hiz_op)
+{
+ const uint8_t cmd_len = 3;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
+ const int num_samples = 1;
+ uint32_t dw1, dw2, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ /* see ilo_gpe_init_rasterizer_wm() */
+ if (rasterizer) {
+ dw1 = rasterizer->wm.payload[0];
+ dw2 = rasterizer->wm.payload[1];
+
+ assert(!hiz_op);
+ dw1 |= GEN7_WM_DW1_STATISTICS;
+ }
+ else {
+ dw1 = hiz_op;
+ dw2 = 0;
+ }
+
+ if (fs) {
+ const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
+
+ dw1 |= fs_cso->payload[3];
+ }
+
+ if (cc_may_kill)
+ dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL;
+
+ if (num_samples > 1) {
+ dw1 |= rasterizer->wm.dw_msaa_rast;
+ dw2 |= rasterizer->wm.dw_msaa_disp;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = dw1;
+ dw[2] = dw2;
+}
+
+static inline void
+gen7_3DSTATE_PS(struct ilo_builder *builder,
+ const struct ilo_shader_state *fs,
+ int num_samplers, bool dual_blend)
+{
+ const uint8_t cmd_len = 8;
+ const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ if (!fs) {
+ int max_threads;
+
+ /* GPU hangs if none of the dispatch enable bits is set */
+ dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH;
+
+ /* see brwCreateContext() */
+ switch (ilo_dev_gen(builder->dev)) {
+ case ILO_GEN(7.5):
+ max_threads = (builder->dev->gt == 3) ? 408 :
+ (builder->dev->gt == 2) ? 204 : 102;
+ dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
+ break;
+ case ILO_GEN(7):
+ default:
+ max_threads = (builder->dev->gt == 2) ? 172 : 48;
+ dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+ break;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw[4] = dw4;
+ dw[5] = 0;
+ dw[6] = 0;
+ dw[7] = 0;
+
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(fs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ if (dual_blend)
+ dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = ilo_shader_get_kernel_offset(fs);
+ dw[2] = dw2;
+ dw[3] = 0; /* scratch */
+ dw[4] = dw4;
+ dw[5] = dw5;
+ dw[6] = 0; /* kernel 1 */
+ dw[7] = 0; /* kernel 2 */
+}
+
+static inline void
+gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
+{
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+ uint32_t dw0, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+ assert(num_bufs <= 4);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(builder->dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) |
+ buf_enabled << 12 |
+ (cmd_len - 2);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ memcpy(&dw[1], buf_dw, sizeof(buf_dw));
+}
+
+static inline void
+gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
+{
+ gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
+ bufs, sizes, num_bufs);
+}
+
+static inline void
+gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder,
+ uint32_t binding_table)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS,
+ binding_table);
+}
+
+static inline void
+gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder,
+ uint32_t sampler_state)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS,
+ sampler_state);
+}
+
+static inline void
+gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
+ int num_samples,
+ const uint32_t *packed_sample_pos,
+ bool pixel_location_center)
+{
+ const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) |
+ (cmd_len - 2);
+ uint32_t dw1, dw2, dw3, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ dw1 = (pixel_location_center) ?
+ GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
+
+ switch (num_samples) {
+ case 0:
+ case 1:
+ dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ case 4:
+ dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
+ dw2 = packed_sample_pos[0];
+ dw3 = 0;
+ break;
+ case 8:
+ assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
+ dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
+ dw2 = packed_sample_pos[0];
+ dw3 = packed_sample_pos[1];
+ break;
+ default:
+ assert(!"unsupported sample count");
+ dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
+ dw2 = 0;
+ dw3 = 0;
+ break;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = dw1;
+ dw[2] = dw2;
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
+ dw[2] = dw3;
+}
+
+static inline void
+gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
+ unsigned sample_mask)
+{
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
+ (cmd_len - 2);
+ const unsigned valid_mask = 0xf;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ sample_mask &= valid_mask;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = sample_mask;
+}
+
+static inline void
+gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
+ unsigned sample_mask,
+ int num_samples)
+{
+ const uint8_t cmd_len = 2;
+ const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+ *
+ * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+ * (Sample Mask) must be zero.
+ *
+ * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+ * must be zero."
+ */
+ sample_mask &= valid_mask;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = sample_mask;
+}
+
+static inline void
+gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height)
+{
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) |
+ (cmd_len - 2);
+ unsigned xmax = x + width - 1;
+ unsigned ymax = y + height - 1;
+ int rect_limit;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
+ rect_limit = 16383;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 230:
+ *
+ * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
+ * must be an even number"
+ */
+ assert(y % 2 == 0);
+
+ rect_limit = 8191;
+ }
+
+ if (x > rect_limit) x = rect_limit;
+ if (y > rect_limit) y = rect_limit;
+ if (xmax > rect_limit) xmax = rect_limit;
+ if (ymax > rect_limit) ymax = rect_limit;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+ dw[0] = dw0;
+ dw[1] = y << 16 | x;
+ dw[2] = ymax << 16 | xmax;
+
+ /*
+ * There is no need to set the origin. It is intended to support front
+ * buffer rendering.
+ */
+ dw[3] = 0;
+}
+
+static inline void
+gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
+ int x_offset, int y_offset)
+{
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+ assert(x_offset >= 0 && x_offset <= 31);
+ assert(y_offset >= 0 && y_offset <= 31);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = x_offset << 8 | y_offset;
+}
+
+static inline void
+gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
+ const struct pipe_poly_stipple *pattern)
+{
+ const uint8_t cmd_len = 33;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) |
+ (cmd_len - 2);
+ uint32_t *dw;
+ int i;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+ STATIC_ASSERT(Elements(pattern->stipple) == 32);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw++;
+
+ for (i = 0; i < 32; i++)
+ dw[i] = pattern->stipple[i];
+}
+
+static inline void
+gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
+ unsigned pattern, unsigned factor)
+{
+ const uint8_t cmd_len = 3;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) |
+ (cmd_len - 2);
+ uint32_t *dw;
+ unsigned inverse;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+ assert((pattern & 0xffff) == pattern);
+ assert(factor >= 1 && factor <= 256);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = pattern;
+
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
+ /* in U1.16 */
+ inverse = (unsigned) (65536.0f / factor);
+ dw[2] = inverse << 15 | factor;
+ }
+ else {
+ /* in U1.13 */
+ inverse = (unsigned) (8192.0f / factor);
+ dw[2] = inverse << 16 | factor;
+ }
+}
+
+static inline void
+gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
+{
+ const uint8_t cmd_len = 3;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = 0 << 16 | 0;
+ dw[2] = 0 << 16 | 0;
+}
+
+static inline void
+gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
+ const struct ilo_zs_surface *zs)
+{
+ const uint8_t cmd_len = 7;
+ unsigned pos;
+ uint32_t dw0, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
+ GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
+ GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
+ dw0 |= (cmd_len - 2);
+
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = zs->payload[0];
+
+ if (zs->bo) {
+ ilo_builder_batch_reloc(builder, pos + 2,
+ zs->bo, zs->payload[1], INTEL_RELOC_WRITE);
+ } else {
+ dw[2] = 0;
+ }
+
+ dw[3] = zs->payload[2];
+ dw[4] = zs->payload[3];
+ dw[5] = zs->payload[4];
+ dw[6] = zs->payload[5];
+}
+
+static inline void
+gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
+ const struct ilo_zs_surface *zs)
+{
+ const uint8_t cmd_len = 3;
+ uint32_t dw0, *dw;
+ unsigned pos;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
+ GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
+ GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
+ dw0 |= (cmd_len - 2);
+
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ /* see ilo_gpe_init_zs_surface() */
+ dw[1] = zs->payload[6];
+
+ if (zs->separate_s8_bo) {
+ ilo_builder_batch_reloc(builder, pos + 2,
+ zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+ } else {
+ dw[2] = 0;
+ }
+}
+
+static inline void
+gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
+ const struct ilo_zs_surface *zs)
+{
+ const uint8_t cmd_len = 3;
+ uint32_t dw0, *dw;
+ unsigned pos;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
+ GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
+ GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
+ dw0 |= (cmd_len - 2);
+
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ /* see ilo_gpe_init_zs_surface() */
+ dw[1] = zs->payload[8];
+
+ if (zs->hiz_bo) {
+ ilo_builder_batch_reloc(builder, pos + 2,
+ zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE);
+ } else {
+ dw[2] = 0;
+ }
+}
+
+static inline void
+gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
+ uint32_t clear_val)
+{
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
+ GEN6_CLEAR_PARAMS_DW0_VALID |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = clear_val;
+}
+
+static inline void
+gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
+ uint32_t clear_val)
+{
+ const uint8_t cmd_len = 3;
+ const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = clear_val;
+ dw[2] = 1;
+}
+
+static inline void
+gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t clip_viewport,
+ uint32_t sf_viewport,
+ uint32_t cc_viewport)
+{
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
+ GEN6_PTR_VP_DW0_CLIP_CHANGED |
+ GEN6_PTR_VP_DW0_SF_CHANGED |
+ GEN6_PTR_VP_DW0_CC_CHANGED |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = clip_viewport;
+ dw[2] = sf_viewport;
+ dw[3] = cc_viewport;
+}
+
+static inline void
+gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t scissor_rect)
+{
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = scissor_rect;
+}
+
+static inline void
+gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t blend_state,
+ uint32_t depth_stencil_state,
+ uint32_t color_calc_state)
+{
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = blend_state | 1;
+ dw[2] = depth_stencil_state | 1;
+ dw[3] = color_calc_state | 1;
+}
+
+static inline void
+gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder,
+ uint32_t sf_clip_viewport)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
+ sf_clip_viewport);
+}
+
+static inline void
+gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder,
+ uint32_t cc_viewport)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
+ cc_viewport);
+}
+
+static inline void
+gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t color_calc_state)
+{
+ gen7_3dstate_pointer(builder,
+ GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state);
+}
+
+static inline void
+gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t depth_stencil_state)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
+ depth_stencil_state);
+}
+
+static inline void
+gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t blend_state)
+{
+ gen7_3dstate_pointer(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS,
+ blend_state);
+}
+
+static inline uint32_t
+gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports)
+{
+ const int state_align = 32;
+ const int state_len = 4 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 193:
+ *
+ * "The viewport-related state is stored as an array of up to 16
+ * elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_gbx);
+ dw[1] = fui(vp->max_gbx);
+ dw[2] = fui(vp->min_gby);
+ dw[3] = fui(vp->max_gby);
+
+ dw += 4;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_SF_VIEWPORT(struct ilo_builder *builder,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports)
+{
+ const int state_align = 32;
+ const int state_len = 8 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 262:
+ *
+ * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+
+ dw += 8;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports)
+{
+ const int state_align = 64;
+ const int state_len = 16 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 270:
+ *
+ * "The viewport-specific state used by both the SF and CL units
+ * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
+ * of which contains the DWords described below. The start of each
+ * element is spaced 16 DWords apart. The location of first element of
+ * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
+ * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->m00);
+ dw[1] = fui(vp->m11);
+ dw[2] = fui(vp->m22);
+ dw[3] = fui(vp->m30);
+ dw[4] = fui(vp->m31);
+ dw[5] = fui(vp->m32);
+ dw[6] = 0;
+ dw[7] = 0;
+ dw[8] = fui(vp->min_gbx);
+ dw[9] = fui(vp->max_gbx);
+ dw[10] = fui(vp->min_gby);
+ dw[11] = fui(vp->max_gby);
+ dw[12] = 0;
+ dw[13] = 0;
+ dw[14] = 0;
+ dw[15] = 0;
+
+ dw += 16;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_CC_VIEWPORT(struct ilo_builder *builder,
+ const struct ilo_viewport_cso *viewports,
+ unsigned num_viewports)
+{
+ const int state_align = 32;
+ const int state_len = 2 * num_viewports;
+ uint32_t state_offset, *dw;
+ unsigned i;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 385:
+ *
+ * "The viewport state is stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw);
+
+ for (i = 0; i < num_viewports; i++) {
+ const struct ilo_viewport_cso *vp = &viewports[i];
+
+ dw[0] = fui(vp->min_z);
+ dw[1] = fui(vp->max_z);
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_SCISSOR_RECT(struct ilo_builder *builder,
+ const struct ilo_scissor_state *scissor,
+ unsigned num_viewports)
+{
+ const int state_align = 32;
+ const int state_len = 2 * num_viewports;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 263:
+ *
+ * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+ * stored as an array of up to 16 elements..."
+ */
+ assert(num_viewports && num_viewports <= 16);
+ assert(Elements(scissor->payload) >= state_len);
+
+ return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT,
+ state_align, state_len, scissor->payload);
+}
+
+static inline uint32_t
+gen6_COLOR_CALC_STATE(struct ilo_builder *builder,
+ const struct pipe_stencil_ref *stencil_ref,
+ ubyte alpha_ref,
+ const struct pipe_blend_color *blend_color)
+{
+ const int state_align = 64;
+ const int state_len = 6;
+ uint32_t state_offset, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw);
+
+ dw[0] = stencil_ref->ref_value[0] << 24 |
+ stencil_ref->ref_value[1] << 16 |
+ GEN6_CC_DW0_ALPHATEST_UNORM8;
+ dw[1] = alpha_ref;
+ dw[2] = fui(blend_color->color[0]);
+ dw[3] = fui(blend_color->color[1]);
+ dw[4] = fui(blend_color->color[2]);
+ dw[5] = fui(blend_color->color[3]);
+
+ return state_offset;
+}
+
+static inline uint32_t
+gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder,
+ const struct ilo_dsa_state *dsa)
+{
+ const int state_align = 64;
+ const int state_len = 3;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ STATIC_ASSERT(Elements(dsa->payload) >= state_len);
+
+ return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL,
+ state_align, state_len, dsa->payload);
+}
+
+static inline uint32_t
+gen6_BLEND_STATE(struct ilo_builder *builder,
+ const struct ilo_blend_state *blend,
+ const struct ilo_fb_state *fb,
+ const struct ilo_dsa_state *dsa)
+{
+ const int state_align = 64;
+ int state_len;
+ uint32_t state_offset, *dw;
+ unsigned num_targets, i;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 376:
+ *
+ * "The blend state is stored as an array of up to 8 elements..."
+ */
+ num_targets = fb->state.nr_cbufs;
+ assert(num_targets <= 8);
+
+ if (!num_targets) {
+ if (!dsa->dw_alpha)
+ return 0;
+ /* to be able to reference alpha func */
+ num_targets = 1;
+ }
+
+ state_len = 2 * num_targets;
+
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
+
+ for (i = 0; i < num_targets; i++) {
+ const unsigned idx = (blend->independent_blend_enable) ? i : 0;
+ const struct ilo_blend_cso *cso = &blend->cso[idx];
+ const int num_samples = fb->num_samples;
+ const struct util_format_description *format_desc =
+ (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
+ util_format_description(fb->state.cbufs[idx]->format) : NULL;
+ bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
+
+ rt_is_unorm = true;
+ rt_is_pure_integer = false;
+ rt_dst_alpha_forced_one = false;
+
+ if (format_desc) {
+ int ch;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /* force alpha to one when the HW format has alpha */
+ assert(ilo_translate_render_format(builder->dev,
+ PIPE_FORMAT_B8G8R8X8_UNORM) ==
+ GEN6_FORMAT_B8G8R8A8_UNORM);
+ rt_dst_alpha_forced_one = true;
+ break;
+ default:
+ break;
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
+ continue;
+
+ if (format_desc->channel[ch].pure_integer) {
+ rt_is_unorm = false;
+ rt_is_pure_integer = true;
+ break;
+ }
+
+ if (!format_desc->channel[ch].normalized ||
+ format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
+ rt_is_unorm = false;
+ }
+ }
+
+ dw[0] = cso->payload[0];
+ dw[1] = cso->payload[1];
+
+ if (!rt_is_pure_integer) {
+ if (rt_dst_alpha_forced_one)
+ dw[0] |= cso->dw_blend_dst_alpha_forced_one;
+ else
+ dw[0] |= cso->dw_blend;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding
+ * _SRGB variants), otherwise Logic Ops must be DISABLED."
+ *
+ * Since logicop is ignored for non-UNORM color buffers, no special care
+ * is needed.
+ */
+ if (rt_is_unorm)
+ dw[1] |= cso->dw_logicop;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+ *
+ * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
+ * Dither both must be disabled."
+ *
+ * There is no such limitation on GEN7, or for AlphaToOne. But GL
+ * requires that anyway.
+ */
+ if (num_samples > 1)
+ dw[1] |= cso->dw_alpha_mod;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value."
+ */
+ if (!rt_is_pure_integer)
+ dw[1] |= dsa->dw_alpha;
+
+ dw += 2;
+ }
+
+ return state_offset;
+}
+
+#endif /* ILO_BUILDER_3D_BOTTOM_H */
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
index 9739665d753..e742f63d698 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h
+++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h
@@ -1,7 +1,7 @@
/*
* Mesa 3-D graphics library
*
- * Copyright (C) 2013 LunarG, Inc.
+ * Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,328 +25,632 @@
* Chia-I Wu <[email protected]>
*/
-#ifndef ILO_GPE_GEN7_H
-#define ILO_GPE_GEN7_H
+#ifndef ILO_BUILDER_3D_TOP_H
+#define ILO_BUILDER_3D_TOP_H
+#include "genhw/genhw.h"
#include "intel_winsys.h"
#include "ilo_common.h"
-#include "ilo_cp.h"
+#include "ilo_gpe.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
-#include "ilo_gpe_gen6.h"
+#include "ilo_builder.h"
static inline void
-gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
- uint32_t clear_val)
+gen6_3DSTATE_URB(struct ilo_builder *builder,
+ int vs_total_size, int gs_total_size,
+ int vs_entry_size, int gs_entry_size)
{
const uint8_t cmd_len = 3;
- const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
- (cmd_len - 2);
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
+ const int row_size = 128; /* 1024 bits */
+ int vs_alloc_size, gs_alloc_size;
+ int vs_num_entries, gs_num_entries;
uint32_t *dw;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = clear_val;
- dw[2] = 1;
-}
+ /* in 1024-bit URB rows */
+ vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
+ gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-static inline void
-gen7_3DSTATE_VF(struct ilo_builder *builder,
- bool enable_cut_index,
- uint32_t cut_index)
-{
- const uint8_t cmd_len = 2;
- uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
- uint32_t *dw;
+ /* the valid range is [1, 5] */
+ if (!vs_alloc_size)
+ vs_alloc_size = 1;
+ if (!gs_alloc_size)
+ gs_alloc_size = 1;
+ assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
- ILO_DEV_ASSERT(builder->dev, 7.5, 7.5);
+ /* the valid range is [24, 256] in multiples of 4 */
+ vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
+ if (vs_num_entries > 256)
+ vs_num_entries = 256;
+ assert(vs_num_entries >= 24);
- if (enable_cut_index)
- dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
+ /* the valid range is [0, 256] in multiples of 4 */
+ gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
+ if (gs_num_entries > 256)
+ gs_num_entries = 256;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = cut_index;
+ dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
+ vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
+ dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
+ (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
}
static inline void
-gen7_3dstate_pointer(struct ilo_builder *builder,
- int subop, uint32_t pointer)
+gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
+ int subop, int offset, int size)
{
const uint8_t cmd_len = 2;
const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop | (cmd_len - 2);
uint32_t *dw;
+ int end;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
+ subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+ *
+ * "(A table that says the maximum size of each constant buffer is
+ * 16KB")
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+ *
+ * "The sum of the Constant Buffer Offset and the Constant Buffer Size
+ * may not exceed the maximum value of the Constant Buffer Size."
+ *
+ * Thus, the valid range of buffer end is [0KB, 16KB].
+ */
+ end = (offset + size) / 1024;
+ if (end > 16) {
+ assert(!"invalid constant buffer end");
+ end = 16;
+ }
+
+ /* the valid range of buffer offset is [0KB, 15KB] */
+ offset = (offset + 1023) / 1024;
+ if (offset > 15) {
+ assert(!"invalid constant buffer offset");
+ offset = 15;
+ }
+
+ if (offset > end) {
+ assert(!size);
+ offset = end;
+ }
+
+ /* the valid range of buffer size is [0KB, 15KB] */
+ size = end - offset;
+ if (size > 15) {
+ assert(!"invalid constant buffer size");
+ size = 15;
+ }
+
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = pointer;
+ dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ size;
}
static inline void
-gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t color_calc_state)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
+ int offset, int size)
{
- gen7_3dstate_pointer(builder,
- GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state);
+ gen7_3dstate_push_constant_alloc(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
}
static inline void
-gen7_3DSTATE_GS(struct ilo_builder *builder,
- const struct ilo_shader_state *gs,
- int num_samplers)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
+ int offset, int size)
{
- const uint8_t cmd_len = 7;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
- if (!gs) {
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = GEN7_GS_DW5_STATISTICS;
- dw[6] = 0;
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = ilo_shader_get_kernel_offset(gs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = 0;
+ gen7_3dstate_push_constant_alloc(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
}
static inline void
-gen7_3DSTATE_SF(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- enum pipe_format zs_format)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
+ int offset, int size)
{
- const uint8_t cmd_len = 7;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
- const int num_samples = 1;
- uint32_t payload[6], *dw;
-
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ gen7_3dstate_push_constant_alloc(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+}
- ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev,
- rasterizer, num_samples, zs_format,
- payload, Elements(payload));
+static inline void
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
+ int offset, int size)
+{
+ gen7_3dstate_push_constant_alloc(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
+}
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- memcpy(&dw[1], payload, sizeof(payload));
+static inline void
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
+ int offset, int size)
+{
+ gen7_3dstate_push_constant_alloc(builder,
+ GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
}
static inline void
-gen7_3DSTATE_WM(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- const struct ilo_rasterizer_state *rasterizer,
- bool cc_may_kill, uint32_t hiz_op)
+gen7_3dstate_urb(struct ilo_builder *builder,
+ int subop, int offset, int size,
+ int entry_size)
{
- const uint8_t cmd_len = 3;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- const int num_samples = 1;
- uint32_t dw1, dw2, *dw;
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
+ GEN6_RENDER_SUBTYPE_3D |
+ subop | (cmd_len - 2);
+ const int row_size = 64; /* 512 bits */
+ int alloc_size, num_entries, min_entries, max_entries;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- /* see ilo_gpe_init_rasterizer_wm() */
- if (rasterizer) {
- dw1 = rasterizer->wm.payload[0];
- dw2 = rasterizer->wm.payload[1];
+ /* VS, HS, DS, and GS variants */
+ assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
+ subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
- assert(!hiz_op);
- dw1 |= GEN7_WM_DW1_STATISTICS;
- }
- else {
- dw1 = hiz_op;
- dw2 = 0;
- }
+ /* in multiples of 8KB */
+ assert(offset % 8192 == 0);
+ offset /= 8192;
+
+ /* in multiple of 512-bit rows */
+ alloc_size = (entry_size + row_size - 1) / row_size;
+ if (!alloc_size)
+ alloc_size = 1;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+ *
+ * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+ * cause performance to decrease due to banking in the URB. Element
+ * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+ */
+ if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
+ alloc_size = 6;
- if (fs) {
- const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
+ /* in multiples of 8 */
+ num_entries = (size / row_size / alloc_size) & ~7;
- dw1 |= fs_cso->payload[3];
- }
+ switch (subop) {
+ case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
+ switch (ilo_dev_gen(builder->dev)) {
+ case ILO_GEN(7.5):
+ max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
+ min_entries = (builder->dev->gt >= 2) ? 64 : 32;
+ break;
+ case ILO_GEN(7):
+ default:
+ max_entries = (builder->dev->gt == 2) ? 704 : 512;
+ min_entries = 32;
+ break;
+ }
- if (cc_may_kill)
- dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL;
+ assert(num_entries >= min_entries);
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
+ max_entries = (builder->dev->gt == 2) ? 64 : 32;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
+ if (num_entries)
+ assert(num_entries >= 138);
+ break;
+ case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
+ switch (ilo_dev_gen(builder->dev)) {
+ case ILO_GEN(7.5):
+ max_entries = (builder->dev->gt >= 2) ? 640 : 256;
+ break;
+ case ILO_GEN(7):
+ default:
+ max_entries = (builder->dev->gt == 2) ? 320 : 192;
+ break;
+ }
- if (num_samples > 1) {
- dw1 |= rasterizer->wm.dw_msaa_rast;
- dw2 |= rasterizer->wm.dw_msaa_disp;
+ if (num_entries > max_entries)
+ num_entries = max_entries;
+ break;
+ default:
+ break;
}
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = dw1;
- dw[2] = dw2;
+ dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
+ (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
+ num_entries;
}
static inline void
-gen7_3dstate_constant(struct ilo_builder *builder,
- int subop,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
+gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
+ int offset, int size, int entry_size)
{
- const uint8_t cmd_len = 7;
- const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop | (cmd_len - 2);
- uint32_t payload[6], *dw;
- int total_read_length, i;
+ gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
+ offset, size, entry_size);
+}
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+static inline void
+gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
+ int offset, int size, int entry_size)
+{
+ gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
+ offset, size, entry_size);
+}
- /* VS, HS, DS, GS, and PS variants */
- assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
- subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
+static inline void
+gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
+ int offset, int size, int entry_size)
+{
+ gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
+ offset, size, entry_size);
+}
- assert(num_bufs <= 4);
+static inline void
+gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
+ int offset, int size, int entry_size)
+{
+ gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
+ offset, size, entry_size);
+}
- payload[0] = 0;
- payload[1] = 0;
+static inline void
+gen7_3DSTATE_VF(struct ilo_builder *builder,
+ bool enable_cut_index,
+ uint32_t cut_index)
+{
+ const uint8_t cmd_len = 2;
+ uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
+ uint32_t *dw;
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- int read_len;
+ ILO_DEV_ASSERT(builder->dev, 7.5, 7.5);
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 112:
- *
- * "Constant buffers must be enabled in order from Constant Buffer 0
- * to Constant Buffer 3 within this command. For example, it is
- * not allowed to enable Constant Buffer 1 by programming a
- * non-zero value in the VS Constant Buffer 1 Read Length without a
- * non-zero value in VS Constant Buffer 0 Read Length."
- */
- if (i >= num_bufs || !sizes[i]) {
- for (; i < 4; i++) {
- assert(i >= num_bufs || !sizes[i]);
- payload[2 + i] = 0;
- }
- break;
- }
+ if (enable_cut_index)
+ dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
- /* read lengths are in 256-bit units */
- read_len = (sizes[i] + 31) / 32;
- /* the lower 5 bits are used for memory object control state */
- assert(bufs[i] % 32 == 0);
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = cut_index;
+}
- payload[i / 2] |= read_len << ((i % 2) ? 16 : 0);
- payload[2 + i] = bufs[i];
+static inline void
+gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
+ bool enable)
+{
+ const uint8_t cmd_len = 1;
+ const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
+ enable;
- total_read_length += read_len;
- }
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ ilo_builder_batch_write(builder, cmd_len, &dw0);
+}
+
+static inline void
+gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
+ const struct ilo_ve_state *ve,
+ const struct ilo_vb_state *vb)
+{
+ uint8_t cmd_len;
+ uint32_t dw0, *dw;
+ unsigned hw_idx, pos;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
/*
- * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+ * From the Sandy Bridge PRM, volume 2 part 1, page 82:
*
- * "The sum of all four read length fields must be less than or equal
- * to the size of 64"
+ * "From 1 to 33 VBs can be specified..."
*/
- assert(total_read_length <= 64);
+ assert(ve->vb_count <= 33);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ if (!ve->vb_count)
+ return;
+
+ cmd_len = 1 + 4 * ve->vb_count;
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) |
+ (cmd_len - 2);
+
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- memcpy(&dw[1], payload, sizeof(payload));
-}
-static inline void
-gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
-{
- gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
- bufs, sizes, num_bufs);
+ dw++;
+ pos++;
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+ const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+ const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
+
+ dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
+
+ if (instance_divisor)
+ dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
+ else
+ dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
+
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
+ dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
+
+ /* use null vb if there is no buffer or the stride is out of range */
+ if (cso->buffer && cso->stride <= 2048) {
+ const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
+ const uint32_t start_offset = cso->buffer_offset;
+ const uint32_t end_offset = buf->bo_size - 1;
+
+ dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
+ ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
+ ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
+ }
+ else {
+ dw[0] |= 1 << 13;
+ dw[1] = 0;
+ dw[2] = 0;
+ }
+
+ dw[3] = instance_divisor;
+
+ dw += 4;
+ pos += 4;
+ }
}
static inline void
-gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+ int comp0, int comp1, int comp2, int comp3,
+ struct ilo_ve_cso *cso)
{
- gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
- bufs, sizes, num_bufs);
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ STATIC_ASSERT(Elements(cso->payload) >= 2);
+ cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
+ cso->payload[1] =
+ comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
+ comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
+ comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
+ comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
}
static inline void
-gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+ struct ilo_ve_cso *cso)
{
- gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
- bufs, sizes, num_bufs);
+ int format;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "- This bit (Edge Flag Enable) must only be ENABLED on the last
+ * valid VERTEX_ELEMENT structure.
+ *
+ * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+ * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+ *
+ * - The Source Element Format must be set to the UINT format.
+ *
+ * - [DevSNB]: Edge Flags are not supported for QUADLIST
+ * primitives. Software may elect to convert QUADLIST primitives
+ * to some set of corresponding edge-flag-supported primitive
+ * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+ */
+
+ cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
+ cso->payload[1] =
+ GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
+
+ /*
+ * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
+ * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
+ * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+ *
+ * Since all the hardware cares about is whether the flags are zero or not,
+ * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
+ */
+ format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
+ if (format == GEN6_FORMAT_R32_FLOAT) {
+ STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
+ cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
+ }
+ else {
+ assert(format == GEN6_FORMAT_R8_UINT);
+ }
}
static inline void
-gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
- unsigned sample_mask,
- int num_samples)
+gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
+ const struct ilo_ve_state *ve,
+ bool last_velement_edgeflag,
+ bool prepend_generated_ids)
{
- const uint8_t cmd_len = 2;
- const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
- (cmd_len - 2);
- uint32_t *dw;
+ uint8_t cmd_len;
+ uint32_t dw0, *dw;
+ unsigned i;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
/*
- * From the Ivy Bridge PRM, volume 2 part 1, page 294:
- *
- * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
- * (Sample Mask) must be zero.
+ * From the Sandy Bridge PRM, volume 2 part 1, page 93:
*
- * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
- * must be zero."
+ * "Up to 34 (DevSNB+) vertex elements are supported."
*/
- sample_mask &= valid_mask;
+ assert(ve->count + prepend_generated_ids <= 34);
+
+ STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
+
+ if (!ve->count && !prepend_generated_ids) {
+ struct ilo_ve_cso dummy;
+
+ ve_init_cso_with_components(builder->dev,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_0,
+ GEN6_VFCOMP_STORE_1_FP,
+ &dummy);
+
+ cmd_len = 3;
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
+ (cmd_len - 2);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ memcpy(&dw[1], dummy.payload, sizeof(dummy.payload));
+
+ return;
+ }
+
+ cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
+ (cmd_len - 2);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = sample_mask;
+ dw++;
+
+ if (prepend_generated_ids) {
+ struct ilo_ve_cso gen_ids;
+
+ ve_init_cso_with_components(builder->dev,
+ GEN6_VFCOMP_STORE_VID,
+ GEN6_VFCOMP_STORE_IID,
+ GEN6_VFCOMP_NOSTORE,
+ GEN6_VFCOMP_NOSTORE,
+ &gen_ids);
+
+ memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload));
+ dw += 2;
+ }
+
+ if (last_velement_edgeflag) {
+ struct ilo_ve_cso edgeflag;
+
+ for (i = 0; i < ve->count - 1; i++)
+ memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
+
+ edgeflag = ve->cso[i];
+ ve_set_cso_edgeflag(builder->dev, &edgeflag);
+ memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload));
+ }
+ else {
+ for (i = 0; i < ve->count; i++)
+ memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
+ }
}
static inline void
-gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
+gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
+ const struct ilo_ib_state *ib,
+ bool enable_cut_index)
{
- gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS,
- bufs, sizes, num_bufs);
+ const uint8_t cmd_len = 3;
+ struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
+ uint32_t start_offset, end_offset;
+ int format;
+ unsigned pos;
+ uint32_t dw0, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ if (!buf)
+ return;
+
+ /* this is moved to the new 3DSTATE_VF */
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
+ assert(!enable_cut_index);
+
+ switch (ib->hw_index_size) {
+ case 4:
+ format = GEN6_IB_DW0_FORMAT_DWORD;
+ break;
+ case 2:
+ format = GEN6_IB_DW0_FORMAT_WORD;
+ break;
+ case 1:
+ format = GEN6_IB_DW0_FORMAT_BYTE;
+ break;
+ default:
+ assert(!"unknown index size");
+ format = GEN6_IB_DW0_FORMAT_BYTE;
+ break;
+ }
+
+ /*
+ * set start_offset to 0 here and adjust pipe_draw_info::start with
+ * ib->draw_start_offset in 3DPRIMITIVE
+ */
+ start_offset = 0;
+ end_offset = buf->bo_size;
+
+ /* end_offset must also be aligned and is inclusive */
+ end_offset -= (end_offset % ib->hw_index_size);
+ end_offset--;
+
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
+ format |
+ (cmd_len - 2);
+ if (enable_cut_index)
+ dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
+
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
+ ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
}
static inline void
-gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
+gen6_3DSTATE_VS(struct ilo_builder *builder,
+ const struct ilo_shader_state *vs,
+ int num_samplers)
{
- gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS,
- bufs, sizes, num_bufs);
+ const uint8_t cmd_len = 6;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+
+ if (!vs) {
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw[4] = 0;
+ dw[5] = 0;
+
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(vs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = ilo_shader_get_kernel_offset(vs);
+ dw[2] = dw2;
+ dw[3] = 0; /* scratch */
+ dw[4] = dw4;
+ dw[5] = dw5;
}
static inline void
@@ -411,6 +715,139 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
}
static inline void
+gen6_3DSTATE_GS(struct ilo_builder *builder,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim)
+{
+ const uint8_t cmd_len = 7;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
+ uint32_t dw1, dw2, dw4, dw5, dw6, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ if (gs) {
+ const struct ilo_shader_cso *cso;
+
+ dw1 = ilo_shader_get_kernel_offset(gs);
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+ dw6 = cso->payload[3];
+ }
+ else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+ struct ilo_shader_cso cso;
+ enum ilo_kernel_param param;
+
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
+
+ dw1 = ilo_shader_get_kernel_offset(vs) +
+ ilo_shader_get_kernel_param(vs, param);
+
+ /* cannot use VS's CSO */
+ ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso);
+ dw2 = cso.payload[0];
+ dw4 = cso.payload[1];
+ dw5 = cso.payload[2];
+ dw6 = cso.payload[3];
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
+ dw5 = GEN6_GS_DW5_STATISTICS;
+ dw6 = 0;
+ }
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = dw1;
+ dw[2] = dw2;
+ dw[3] = 0;
+ dw[4] = dw4;
+ dw[5] = dw5;
+ dw[6] = dw6;
+}
+
+static inline void
+gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
+ int index, unsigned svbi,
+ unsigned max_svbi,
+ bool load_vertex_count)
+{
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) |
+ (cmd_len - 2);
+ uint32_t dw1, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+ assert(index >= 0 && index < 4);
+
+ dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
+ if (load_vertex_count)
+ dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = dw1;
+ dw[2] = svbi;
+ dw[3] = max_svbi;
+}
+
+static inline void
+gen7_3DSTATE_GS(struct ilo_builder *builder,
+ const struct ilo_shader_state *gs,
+ int num_samplers)
+{
+ const uint8_t cmd_len = 7;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5, *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ if (!gs) {
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw[4] = 0;
+ dw[5] = GEN7_GS_DW5_STATISTICS;
+ dw[6] = 0;
+ return;
+ }
+
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = ilo_shader_get_kernel_offset(gs);
+ dw[2] = dw2;
+ dw[3] = 0; /* scratch */
+ dw[4] = dw4;
+ dw[5] = dw5;
+ dw[6] = 0;
+}
+
+static inline void
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
unsigned buffer_mask,
int vertex_attrib_count,
@@ -471,124 +908,204 @@ gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
}
static inline void
-gen7_3DSTATE_SBE(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs)
+gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
+ const struct pipe_stream_output_info *so_info)
{
- const uint8_t cmd_len = 14;
- const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
- uint32_t payload[13], *dw;
+ uint16_t cmd_len;
+ uint32_t dw0, *dw;
+ int buffer_selects, num_entries, i;
+ uint16_t so_decls[128];
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev,
- rasterizer, fs, payload, Elements(payload));
+ buffer_selects = 0;
+ num_entries = 0;
+
+ if (so_info) {
+ int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+
+ memset(buffer_offsets, 0, sizeof(buffer_offsets));
+
+ for (i = 0; i < so_info->num_outputs; i++) {
+ unsigned decl, buf, reg, mask;
+
+ buf = so_info->output[i].output_buffer;
+
+ /* pad with holes */
+ assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
+ while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
+ int num_dwords;
+
+ num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
+ if (num_dwords > 4)
+ num_dwords = 4;
+
+ decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+ GEN7_SO_DECL_HOLE_FLAG |
+ ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_offsets[buf] += num_dwords;
+ }
+
+ reg = so_info->output[i].register_index;
+ mask = ((1 << so_info->output[i].num_components) - 1) <<
+ so_info->output[i].start_component;
+
+ decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+ reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
+ mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+
+ so_decls[num_entries++] = decl;
+ buffer_selects |= 1 << buf;
+ buffer_offsets[buf] += so_info->output[i].num_components;
+ }
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+ *
+ * "Errata: All 128 decls for all four streams must be included
+ * whenever this command is issued. The "Num Entries [n]" fields still
+ * contain the actual numbers of valid decls."
+ *
+ * Also note that "DWord Length" has 9 bits for this command, and the type
+ * of cmd_len is thus uint16_t.
+ */
+ cmd_len = 2 * 128 + 3;
+ dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- memcpy(&dw[1], payload, sizeof(payload));
+ dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
+ 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
+ 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
+ buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
+ dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
+ 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
+ 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
+ num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
+ dw += 3;
+
+ for (i = 0; i < num_entries; i++) {
+ dw[0] = so_decls[i];
+ dw[1] = 0;
+ dw += 2;
+ }
+ for (; i < 128; i++) {
+ dw[0] = 0;
+ dw[1] = 0;
+ dw += 2;
+ }
}
static inline void
-gen7_3DSTATE_PS(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- int num_samplers, bool dual_blend)
+gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
+ int index, int base, int stride,
+ const struct pipe_stream_output_target *so_target)
{
- const uint8_t cmd_len = 8;
- const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) |
+ (cmd_len - 2);
+ struct ilo_buffer *buf;
+ int end;
+ unsigned pos;
+ uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- if (!fs) {
- int max_threads;
-
- /* GPU hangs if none of the dispatch enable bits is set */
- dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH;
-
- /* see brwCreateContext() */
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(7.5):
- max_threads = (builder->dev->gt == 3) ? 408 :
- (builder->dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (builder->dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
+ if (!so_target || !so_target->buffer) {
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = 0;
+ dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
dw[2] = 0;
dw[3] = 0;
- dw[4] = dw4;
- dw[5] = 0;
- dw[6] = 0;
- dw[7] = 0;
return;
}
- cso = ilo_shader_get_kernel_cso(fs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
+ buf = ilo_buffer(so_target->buffer);
- dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+ /* DWord-aligned */
+ assert(stride % 4 == 0 && base % 4 == 0);
+ assert(so_target->buffer_offset % 4 == 0);
- if (dual_blend)
- dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+ stride &= ~3;
+ base = (base + so_target->buffer_offset) & ~3;
+ end = (base + so_target->buffer_size) & ~3;
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = ilo_shader_get_kernel_offset(fs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = 0; /* kernel 1 */
- dw[7] = 0; /* kernel 2 */
-}
+ dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
+ stride;
-static inline void
-gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder,
- uint32_t sf_clip_viewport)
-{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
- sf_clip_viewport);
+ ilo_builder_batch_reloc(builder, pos + 2,
+ buf->bo, base, INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 3,
+ buf->bo, end, INTEL_RELOC_WRITE);
}
static inline void
-gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder,
- uint32_t cc_viewport)
+gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
+ uint32_t vs_binding_table,
+ uint32_t gs_binding_table,
+ uint32_t ps_binding_table)
{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
- cc_viewport);
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
+ GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
+ GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
+ GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = vs_binding_table;
+ dw[2] = gs_binding_table;
+ dw[3] = ps_binding_table;
}
static inline void
-gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t blend_state)
+gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
+ uint32_t vs_sampler_state,
+ uint32_t gs_sampler_state,
+ uint32_t ps_sampler_state)
{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS,
- blend_state);
+ const uint8_t cmd_len = 4;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
+ GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
+ GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
+ GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = vs_sampler_state;
+ dw[2] = gs_sampler_state;
+ dw[3] = ps_sampler_state;
}
static inline void
-gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t depth_stencil_state)
+gen7_3dstate_pointer(struct ilo_builder *builder,
+ int subop, uint32_t pointer)
{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
- depth_stencil_state);
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
+ GEN6_RENDER_SUBTYPE_3D |
+ subop | (cmd_len - 2);
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ dw[1] = pointer;
}
static inline void
@@ -628,15 +1145,6 @@ gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder,
}
static inline void
-gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder,
- uint32_t binding_table)
-{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS,
- binding_table);
-}
-
-static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder,
uint32_t sampler_state)
{
@@ -672,459 +1180,445 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder,
sampler_state);
}
-static inline void
-gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder,
- uint32_t sampler_state)
+static inline unsigned
+gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs, int max_read_length,
+ uint32_t *dw, int num_dwords)
{
- gen7_3dstate_pointer(builder,
- GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS,
- sampler_state);
-}
+ unsigned enabled = 0x0;
+ int total_read_length, i;
-static inline void
-gen7_3dstate_urb(struct ilo_builder *builder,
- int subop, int offset, int size,
- int entry_size)
-{
- const uint8_t cmd_len = 2;
- const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
- GEN6_RENDER_SUBTYPE_3D |
- subop | (cmd_len - 2);
- const int row_size = 64; /* 512 bits */
- int alloc_size, num_entries, min_entries, max_entries;
- uint32_t *dw;
+ assert(num_dwords == 4);
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ if (i < num_bufs && sizes[i]) {
+ /* in 256-bit units minus one */
+ const int read_len = (sizes[i] + 31) / 32 - 1;
- /* VS, HS, DS, and GS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
+ assert(bufs[i] % 32 == 0);
+ assert(read_len < 32);
- /* in multiples of 8KB */
- assert(offset % 8192 == 0);
- offset /= 8192;
+ enabled |= 1 << i;
+ dw[i] = bufs[i] | read_len;
- /* in multiple of 512-bit rows */
- alloc_size = (entry_size + row_size - 1) / row_size;
- if (!alloc_size)
- alloc_size = 1;
+ total_read_length += read_len + 1;
+ }
+ else {
+ dw[i] = 0;
+ }
+ }
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 34:
- *
- * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
- * cause performance to decrease due to banking in the URB. Element
- * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
- */
- if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
- alloc_size = 6;
+ assert(total_read_length <= max_read_length);
- /* in multiples of 8 */
- num_entries = (size / row_size / alloc_size) & ~7;
+ return enabled;
+}
- switch (subop) {
- case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
- min_entries = (builder->dev->gt >= 2) ? 64 : 32;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 704 : 512;
- min_entries = 32;
- break;
- }
+static inline void
+gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
+{
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+ uint32_t dw0, *dw;
- assert(num_entries >= min_entries);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
- max_entries = (builder->dev->gt == 2) ? 64 : 32;
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
- if (num_entries)
- assert(num_entries >= 138);
- break;
- case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
- switch (ilo_dev_gen(builder->dev)) {
- case ILO_GEN(7.5):
- max_entries = (builder->dev->gt >= 2) ? 640 : 256;
- break;
- case ILO_GEN(7):
- default:
- max_entries = (builder->dev->gt == 2) ? 320 : 192;
- break;
- }
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+ assert(num_bufs <= 4);
- if (num_entries > max_entries)
- num_entries = max_entries;
- break;
- default:
- break;
- }
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 32"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(builder->dev,
+ bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
+
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
+ buf_enabled << 12 |
+ (cmd_len - 2);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
- (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
- num_entries;
+ memcpy(&dw[1], buf_dw, sizeof(buf_dw));
}
static inline void
-gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
+gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
- offset, size, entry_size);
-}
+ const uint8_t cmd_len = 5;
+ uint32_t buf_dw[4], buf_enabled;
+ uint32_t dw0, *dw;
-static inline void
-gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
-{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
- offset, size, entry_size);
-}
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
+ assert(num_bufs <= 4);
-static inline void
-gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
-{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
- offset, size, entry_size);
-}
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to 64"
+ */
+ buf_enabled = gen6_fill_3dstate_constant(builder->dev,
+ bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-static inline void
-gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
- int offset, int size, int entry_size)
-{
- gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
- offset, size, entry_size);
+ dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
+ buf_enabled << 12 |
+ (cmd_len - 2);
+
+ ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ dw[0] = dw0;
+ memcpy(&dw[1], buf_dw, sizeof(buf_dw));
}
static inline void
-gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
- int subop, int offset, int size)
+gen7_3dstate_constant(struct ilo_builder *builder,
+ int subop,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- const uint8_t cmd_len = 2;
+ const uint8_t cmd_len = 7;
const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop | (cmd_len - 2);
- uint32_t *dw;
- int end;
+ uint32_t payload[6], *dw;
+ int total_read_length, i;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
/* VS, HS, DS, GS, and PS variants */
- assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
- subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+ assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
+ subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
+ subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 68:
- *
- * "(A table that says the maximum size of each constant buffer is
- * 16KB")
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 115:
- *
- * "The sum of the Constant Buffer Offset and the Constant Buffer Size
- * may not exceed the maximum value of the Constant Buffer Size."
- *
- * Thus, the valid range of buffer end is [0KB, 16KB].
- */
- end = (offset + size) / 1024;
- if (end > 16) {
- assert(!"invalid constant buffer end");
- end = 16;
- }
+ assert(num_bufs <= 4);
- /* the valid range of buffer offset is [0KB, 15KB] */
- offset = (offset + 1023) / 1024;
- if (offset > 15) {
- assert(!"invalid constant buffer offset");
- offset = 15;
- }
+ payload[0] = 0;
+ payload[1] = 0;
- if (offset > end) {
- assert(!size);
- offset = end;
- }
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ int read_len;
- /* the valid range of buffer size is [0KB, 15KB] */
- size = end - offset;
- if (size > 15) {
- assert(!"invalid constant buffer size");
- size = 15;
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 112:
+ *
+ * "Constant buffers must be enabled in order from Constant Buffer 0
+ * to Constant Buffer 3 within this command. For example, it is
+ * not allowed to enable Constant Buffer 1 by programming a
+ * non-zero value in the VS Constant Buffer 1 Read Length without a
+ * non-zero value in VS Constant Buffer 0 Read Length."
+ */
+ if (i >= num_bufs || !sizes[i]) {
+ for (; i < 4; i++) {
+ assert(i >= num_bufs || !sizes[i]);
+ payload[2 + i] = 0;
+ }
+ break;
+ }
+
+ /* read lengths are in 256-bit units */
+ read_len = (sizes[i] + 31) / 32;
+ /* the lower 5 bits are used for memory object control state */
+ assert(bufs[i] % 32 == 0);
+
+ payload[i / 2] |= read_len << ((i % 2) ? 16 : 0);
+ payload[2 + i] = bufs[i];
+
+ total_read_length += read_len;
}
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+ *
+ * "The sum of all four read length fields must be less than or equal
+ * to the size of 64"
+ */
+ assert(total_read_length <= 64);
+
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
- size;
-}
-
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
- int offset, int size)
-{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
+ memcpy(&dw[1], payload, sizeof(payload));
}
static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
- int offset, int size)
+gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
+ gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
+ bufs, sizes, num_bufs);
}
static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
- int offset, int size)
+gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+ gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS,
+ bufs, sizes, num_bufs);
}
static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
- int offset, int size)
+gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
+ gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS,
+ bufs, sizes, num_bufs);
}
static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
- int offset, int size)
+gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
{
- gen7_3dstate_push_constant_alloc(builder,
- GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
+ gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
+ bufs, sizes, num_bufs);
}
-static inline void
-gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
- const struct pipe_stream_output_info *so_info)
+static inline uint32_t
+gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
+ uint32_t *surface_states,
+ int num_surface_states)
{
- uint16_t cmd_len;
- uint32_t dw0, *dw;
- int buffer_selects, num_entries, i;
- uint16_t so_decls[128];
+ const int state_align = 32;
+ const int state_len = num_surface_states;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- buffer_selects = 0;
- num_entries = 0;
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * "It is stored as an array of up to 256 elements..."
+ */
+ assert(num_surface_states <= 256);
- if (so_info) {
- int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+ if (!num_surface_states)
+ return 0;
- memset(buffer_offsets, 0, sizeof(buffer_offsets));
+ return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE,
+ state_align, state_len, surface_states);
+}
- for (i = 0; i < so_info->num_outputs; i++) {
- unsigned decl, buf, reg, mask;
+static inline uint32_t
+gen6_SURFACE_STATE(struct ilo_builder *builder,
+ const struct ilo_view_surface *surf,
+ bool for_render)
+{
+ const int state_align = 32;
+ const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6;
+ uint32_t state_offset;
- buf = so_info->output[i].output_buffer;
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- /* pad with holes */
- assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
- while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
- int num_dwords;
+ state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE,
+ state_align, state_len, surf->payload);
- num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
- if (num_dwords > 4)
- num_dwords = 4;
+ if (surf->bo) {
+ ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
+ surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
+ }
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- GEN7_SO_DECL_HOLE_FLAG |
- ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+ return state_offset;
+}
- so_decls[num_entries++] = decl;
- buffer_offsets[buf] += num_dwords;
- }
+static inline uint32_t
+gen6_so_SURFACE_STATE(struct ilo_builder *builder,
+ const struct pipe_stream_output_target *so,
+ const struct pipe_stream_output_info *so_info,
+ int so_index)
+{
+ struct ilo_buffer *buf = ilo_buffer(so->buffer);
+ unsigned bo_offset, struct_size;
+ enum pipe_format elem_format;
+ struct ilo_view_surface surf;
- reg = so_info->output[i].register_index;
- mask = ((1 << so_info->output[i].num_components) - 1) <<
- so_info->output[i].start_component;
+ ILO_DEV_ASSERT(builder->dev, 6, 6);
- decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
- reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
- mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+ bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
- so_decls[num_entries++] = decl;
- buffer_selects |= 1 << buf;
- buffer_offsets[buf] += so_info->output[i].num_components;
- }
+ switch (so_info->output[so_index].num_components) {
+ case 1:
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
+ case 2:
+ elem_format = PIPE_FORMAT_R32G32_FLOAT;
+ break;
+ case 3:
+ elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ break;
+ case 4:
+ elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ break;
+ default:
+ assert(!"unexpected SO components length");
+ elem_format = PIPE_FORMAT_R32_FLOAT;
+ break;
}
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 201:
- *
- * "Errata: All 128 decls for all four streams must be included
- * whenever this command is issued. The "Num Entries [n]" fields still
- * contain the actual numbers of valid decls."
- *
- * Also note that "DWord Length" has 9 bits for this command, and the type
- * of cmd_len is thus uint16_t.
- */
- cmd_len = 2 * 128 + 3;
- dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
- 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
- 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
- buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
- dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
- 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
- 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
- num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
- dw += 3;
+ ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset,
+ so->buffer_size, struct_size, elem_format, false, true, &surf);
- for (i = 0; i < num_entries; i++) {
- dw[0] = so_decls[i];
- dw[1] = 0;
- dw += 2;
- }
- for (; i < 128; i++) {
- dw[0] = 0;
- dw[1] = 0;
- dw += 2;
- }
+ return gen6_SURFACE_STATE(builder, &surf, false);
}
-static inline void
-gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
- int index, int base, int stride,
- const struct pipe_stream_output_target *so_target)
+static inline uint32_t
+gen6_SAMPLER_STATE(struct ilo_builder *builder,
+ const struct ilo_sampler_cso * const *samplers,
+ const struct pipe_sampler_view * const *views,
+ const uint32_t *sampler_border_colors,
+ int num_samplers)
{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) |
- (cmd_len - 2);
- struct ilo_buffer *buf;
- int end;
- unsigned pos;
- uint32_t *dw;
+ const int state_align = 32;
+ const int state_len = 4 * num_samplers;
+ uint32_t state_offset, *dw;
+ int i;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- if (!so_target || !so_target->buffer) {
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
- dw[2] = 0;
- dw[3] = 0;
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 101:
+ *
+ * "The sampler state is stored as an array of up to 16 elements..."
+ */
+ assert(num_samplers <= 16);
- return;
- }
+ if (!num_samplers)
+ return 0;
- buf = ilo_buffer(so_target->buffer);
+ state_offset = ilo_builder_state_pointer(builder,
+ ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
+
+ for (i = 0; i < num_samplers; i++) {
+ const struct ilo_sampler_cso *sampler = samplers[i];
+ const struct pipe_sampler_view *view = views[i];
+ const uint32_t border_color = sampler_border_colors[i];
+ uint32_t dw_filter, dw_wrap;
+
+ /* there may be holes */
+ if (!sampler || !view) {
+ /* disabled sampler */
+ dw[0] = 1 << 31;
+ dw[1] = 0;
+ dw[2] = 0;
+ dw[3] = 0;
+ dw += 4;
+
+ continue;
+ }
- /* DWord-aligned */
- assert(stride % 4 == 0 && base % 4 == 0);
- assert(so_target->buffer_offset % 4 == 0);
+ /* determine filter and wrap modes */
+ switch (view->texture->target) {
+ case PIPE_TEXTURE_1D:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_1d;
+ break;
+ case PIPE_TEXTURE_3D:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+ *
+ * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+ * surfaces of type SURFTYPE_3D."
+ */
+ dw_filter = sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap_cube;
+ break;
+ default:
+ dw_filter = (sampler->anisotropic) ?
+ sampler->dw_filter_aniso : sampler->dw_filter;
+ dw_wrap = sampler->dw_wrap;
+ break;
+ }
- stride &= ~3;
- base = (base + so_target->buffer_offset) & ~3;
- end = (base + so_target->buffer_size) & ~3;
+ dw[0] = sampler->payload[0];
+ dw[1] = sampler->payload[1];
+ assert(!(border_color & 0x1f));
+ dw[2] = border_color;
+ dw[3] = sampler->payload[2];
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
- stride;
+ dw[0] |= dw_filter;
- ilo_builder_batch_reloc(builder, pos + 2,
- buf->bo, base, INTEL_RELOC_WRITE);
- ilo_builder_batch_reloc(builder, pos + 3,
- buf->bo, end, INTEL_RELOC_WRITE);
+ if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
+ dw[3] |= dw_wrap;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+ *
+ * "[DevSNB] Errata: Incorrect behavior is observed in cases
+ * where the min and mag mode filters are different and
+ * SurfMinLOD is nonzero. The determination of MagMode uses the
+ * following equation instead of the one in the above
+ * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
+ *
+ * As a way to work around that, we set Base to
+ * view->u.tex.first_level.
+ */
+ dw[0] |= view->u.tex.first_level << 22;
+
+ dw[1] |= dw_wrap;
+ }
+
+ dw += 4;
+ }
+
+ return state_offset;
}
-static inline void
-gen7_3DPRIMITIVE(struct ilo_builder *builder,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist)
+static inline uint32_t
+gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
+ const struct ilo_sampler_cso *sampler)
{
- const uint8_t cmd_len = 7;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
- const int prim = (rectlist) ?
- GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN7_3DPRIM_DW1_ACCESS_RANDOM :
- GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
- uint32_t *dw;
+ const int state_align = 32;
+ const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12;
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = vb_access | prim;
- dw[2] = info->count;
- dw[3] = vb_start;
- dw[4] = info->instance_count;
- dw[5] = info->start_instance;
- dw[6] = info->index_bias;
+ assert(Elements(sampler->payload) >= 3 + state_len);
+
+ /* see ilo_gpe_init_sampler_cso() */
+ return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB,
+ state_align, state_len, &sampler->payload[3]);
}
static inline uint32_t
-gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
+gen6_push_constant_buffer(struct ilo_builder *builder,
+ int size, void **pcb)
{
- const int state_align = 64;
- const int state_len = 16 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
/*
- * From the Ivy Bridge PRM, volume 2 part 1, page 270:
- *
- * "The viewport-specific state used by both the SF and CL units
- * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
- * of which contains the DWords described below. The start of each
- * element is spaced 16 DWords apart. The location of first element of
- * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
- * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
+ * For all VS, GS, FS, and CS push constant buffers, they must be aligned
+ * to 32 bytes, and their sizes are specified in 256-bit units.
*/
- assert(num_viewports && num_viewports <= 16);
+ const int state_align = 32;
+ const int state_len = align(size, 32) / 4;
+ uint32_t state_offset;
+ char *buf;
+
+ ILO_DEV_ASSERT(builder->dev, 6, 7.5);
state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
+ ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf);
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
+ /* zero out the unused range */
+ if (size < state_len * 4)
+ memset(&buf[size], 0, state_len * 4 - size);
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = fui(vp->min_gbx);
- dw[9] = fui(vp->max_gbx);
- dw[10] = fui(vp->min_gby);
- dw[11] = fui(vp->max_gby);
- dw[12] = 0;
- dw[13] = 0;
- dw[14] = 0;
- dw[15] = 0;
-
- dw += 16;
- }
+ if (pcb)
+ *pcb = buf;
return state_offset;
}
-#endif /* ILO_GPE_GEN7_H */
+#endif /* ILO_BUILDER_3D_TOP_H */
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
index eb537f801b8..f417710237a 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
@@ -32,10 +32,6 @@
#include "intel_winsys.h"
#include "ilo_common.h"
-#include "ilo_cp.h"
-#include "ilo_format.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
#include "ilo_gpe.h"
/**
@@ -58,34 +54,6 @@ ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
}
/**
- * Translate a pipe primitive type to the matching hardware primitive type.
- */
-static inline int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
-{
- static const int prim_mapping[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
- [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
- [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
- [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
- [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
- [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
- [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
- [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
- [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
- };
-
- assert(prim_mapping[prim]);
-
- return prim_mapping[prim];
-}
-
-/**
* Translate a pipe texture target to the matching hardware surface type.
*/
static inline int
@@ -112,1014 +80,6 @@ ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
}
}
-/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
- */
-static inline void
-ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- int num_samples,
- enum pipe_format depth_format,
- uint32_t *payload, unsigned payload_len)
-{
- assert(payload_len == Elements(rasterizer->sf.payload));
-
- if (rasterizer) {
- const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
-
- memcpy(payload, sf->payload, sizeof(sf->payload));
- if (num_samples > 1)
- payload[1] |= sf->dw_msaa;
- }
- else {
- payload[0] = 0;
- payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
- payload[2] = 0;
- payload[3] = 0;
- payload[4] = 0;
- payload[5] = 0;
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- int format;
-
- /* separate stencil */
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM:
- format = GEN6_ZFORMAT_D16_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- break;
- default:
- /* FLOAT surface is assumed when there is no depth buffer */
- format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- }
-
- payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
- }
-}
-
-/**
- * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
- */
-static inline void
-ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- uint32_t *dw, int num_dwords)
-{
- int output_count, vue_offset, vue_len;
- const struct ilo_kernel_routing *routing;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
- assert(num_dwords == 13);
-
- if (!fs) {
- memset(dw, 0, sizeof(dw[0]) * num_dwords);
- dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
- return;
- }
-
- output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- assert(output_count <= 32);
-
- routing = ilo_shader_get_kernel_routing(fs);
-
- vue_offset = routing->source_skip;
- assert(vue_offset % 2 == 0);
- vue_offset /= 2;
-
- vue_len = (routing->source_len + 1) / 2;
- if (!vue_len)
- vue_len = 1;
-
- dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
- vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
- vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
- if (routing->swizzle_enable)
- dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
-
- switch (rasterizer->state.sprite_coord_mode) {
- case PIPE_SPRITE_COORD_UPPER_LEFT:
- dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
- break;
- case PIPE_SPRITE_COORD_LOWER_LEFT:
- dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
- break;
- }
-
- STATIC_ASSERT(Elements(routing->swizzles) >= 16);
- memcpy(&dw[1], routing->swizzles, 2 * 16);
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- dw[9] = routing->point_sprite_enable;
-
- dw[10] = routing->const_interp_enable;
-
- /* WrapShortest enables */
- dw[11] = 0;
- dw[12] = 0;
-}
-
-static inline void
-gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
- bool enable)
-{
- const uint8_t cmd_len = 1;
- const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
- enable;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- ilo_builder_batch_write(builder, cmd_len, &dw0);
-}
-
-static inline void
-gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
- uint32_t vs_binding_table,
- uint32_t gs_binding_table,
- uint32_t ps_binding_table)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
- GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
- GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
- GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = vs_binding_table;
- dw[2] = gs_binding_table;
- dw[3] = ps_binding_table;
-}
-
-static inline void
-gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t vs_sampler_state,
- uint32_t gs_sampler_state,
- uint32_t ps_sampler_state)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
- GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
- GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
- GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = vs_sampler_state;
- dw[2] = gs_sampler_state;
- dw[3] = ps_sampler_state;
-}
-
-static inline void
-gen6_3DSTATE_URB(struct ilo_builder *builder,
- int vs_total_size, int gs_total_size,
- int vs_entry_size, int gs_entry_size)
-{
- const uint8_t cmd_len = 3;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
- const int row_size = 128; /* 1024 bits */
- int vs_alloc_size, gs_alloc_size;
- int vs_num_entries, gs_num_entries;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- /* in 1024-bit URB rows */
- vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
- gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
- /* the valid range is [1, 5] */
- if (!vs_alloc_size)
- vs_alloc_size = 1;
- if (!gs_alloc_size)
- gs_alloc_size = 1;
- assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
- /* the valid range is [24, 256] in multiples of 4 */
- vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
- if (vs_num_entries > 256)
- vs_num_entries = 256;
- assert(vs_num_entries >= 24);
-
- /* the valid range is [0, 256] in multiples of 4 */
- gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
- if (gs_num_entries > 256)
- gs_num_entries = 256;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
- vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
- dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
- (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
-}
-
-static inline void
-gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
- const struct ilo_ve_state *ve,
- const struct ilo_vb_state *vb)
-{
- uint8_t cmd_len;
- uint32_t dw0, *dw;
- unsigned hw_idx, pos;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 82:
- *
- * "From 1 to 33 VBs can be specified..."
- */
- assert(ve->vb_count <= 33);
-
- if (!ve->vb_count)
- return;
-
- cmd_len = 1 + 4 * ve->vb_count;
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) |
- (cmd_len - 2);
-
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
-
- dw++;
- pos++;
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- const unsigned instance_divisor = ve->instance_divisors[hw_idx];
- const unsigned pipe_idx = ve->vb_mapping[hw_idx];
- const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
-
- dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
-
- if (instance_divisor)
- dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
- else
- dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
-
- /* use null vb if there is no buffer or the stride is out of range */
- if (cso->buffer && cso->stride <= 2048) {
- const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
- const uint32_t start_offset = cso->buffer_offset;
- const uint32_t end_offset = buf->bo_size - 1;
-
- dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
- ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
- ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
- }
- else {
- dw[0] |= 1 << 13;
- dw[1] = 0;
- dw[2] = 0;
- }
-
- dw[3] = instance_divisor;
-
- dw += 4;
- pos += 4;
- }
-}
-
-static inline void
-ve_init_cso_with_components(const struct ilo_dev_info *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso)
-{
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
- cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
- cso->payload[1] =
- comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
- comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
- comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
- comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
-}
-
-static inline void
-ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
- struct ilo_ve_cso *cso)
-{
- int format;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 94:
- *
- * "- This bit (Edge Flag Enable) must only be ENABLED on the last
- * valid VERTEX_ELEMENT structure.
- *
- * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
- * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
- *
- * - The Source Element Format must be set to the UINT format.
- *
- * - [DevSNB]: Edge Flags are not supported for QUADLIST
- * primitives. Software may elect to convert QUADLIST primitives
- * to some set of corresponding edge-flag-supported primitive
- * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
- */
-
- cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
- cso->payload[1] =
- GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
-
- /*
- * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
- * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
- * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
- *
- * Since all the hardware cares about is whether the flags are zero or not,
- * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
- */
- format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
- if (format == GEN6_FORMAT_R32_FLOAT) {
- STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
- cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
- }
- else {
- assert(format == GEN6_FORMAT_R8_UINT);
- }
-}
-
-static inline void
-gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
- const struct ilo_ve_state *ve,
- bool last_velement_edgeflag,
- bool prepend_generated_ids)
-{
- uint8_t cmd_len;
- uint32_t dw0, *dw;
- unsigned i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 93:
- *
- * "Up to 34 (DevSNB+) vertex elements are supported."
- */
- assert(ve->count + prepend_generated_ids <= 34);
-
- STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
-
- if (!ve->count && !prepend_generated_ids) {
- struct ilo_ve_cso dummy;
-
- ve_init_cso_with_components(builder->dev,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_0,
- GEN6_VFCOMP_STORE_1_FP,
- &dummy);
-
- cmd_len = 3;
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- memcpy(&dw[1], dummy.payload, sizeof(dummy.payload));
-
- return;
- }
-
- cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw++;
-
- if (prepend_generated_ids) {
- struct ilo_ve_cso gen_ids;
-
- ve_init_cso_with_components(builder->dev,
- GEN6_VFCOMP_STORE_VID,
- GEN6_VFCOMP_STORE_IID,
- GEN6_VFCOMP_NOSTORE,
- GEN6_VFCOMP_NOSTORE,
- &gen_ids);
-
- memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload));
- dw += 2;
- }
-
- if (last_velement_edgeflag) {
- struct ilo_ve_cso edgeflag;
-
- for (i = 0; i < ve->count - 1; i++)
- memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
-
- edgeflag = ve->cso[i];
- ve_set_cso_edgeflag(builder->dev, &edgeflag);
- memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload));
- }
- else {
- for (i = 0; i < ve->count; i++)
- memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
- }
-}
-
-static inline void
-gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
- const struct ilo_ib_state *ib,
- bool enable_cut_index)
-{
- const uint8_t cmd_len = 3;
- struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
- uint32_t start_offset, end_offset;
- int format;
- unsigned pos;
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- if (!buf)
- return;
-
- /* this is moved to the new 3DSTATE_VF */
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
- assert(!enable_cut_index);
-
- switch (ib->hw_index_size) {
- case 4:
- format = GEN6_IB_DW0_FORMAT_DWORD;
- break;
- case 2:
- format = GEN6_IB_DW0_FORMAT_WORD;
- break;
- case 1:
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- default:
- assert(!"unknown index size");
- format = GEN6_IB_DW0_FORMAT_BYTE;
- break;
- }
-
- /*
- * set start_offset to 0 here and adjust pipe_draw_info::start with
- * ib->draw_start_offset in 3DPRIMITIVE
- */
- start_offset = 0;
- end_offset = buf->bo_size;
-
- /* end_offset must also be aligned and is inclusive */
- end_offset -= (end_offset % ib->hw_index_size);
- end_offset--;
-
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
- format |
- (cmd_len - 2);
- if (enable_cut_index)
- dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
-
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
- ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
-}
-
-static inline void
-gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t clip_viewport,
- uint32_t sf_viewport,
- uint32_t cc_viewport)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
- GEN6_PTR_VP_DW0_CLIP_CHANGED |
- GEN6_PTR_VP_DW0_SF_CHANGED |
- GEN6_PTR_VP_DW0_CC_CHANGED |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = clip_viewport;
- dw[2] = sf_viewport;
- dw[3] = cc_viewport;
-}
-
-static inline void
-gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t blend_state,
- uint32_t depth_stencil_state,
- uint32_t color_calc_state)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = blend_state | 1;
- dw[2] = depth_stencil_state | 1;
- dw[3] = color_calc_state | 1;
-}
-
-static inline void
-gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder,
- uint32_t scissor_rect)
-{
- const uint8_t cmd_len = 2;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = scissor_rect;
-}
-
-static inline void
-gen6_3DSTATE_VS(struct ilo_builder *builder,
- const struct ilo_shader_state *vs,
- int num_samplers)
-{
- const uint8_t cmd_len = 6;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
- const struct ilo_shader_cso *cso;
- uint32_t dw2, dw4, dw5, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- if (!vs) {
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = 0;
- dw[5] = 0;
-
- return;
- }
-
- cso = ilo_shader_get_kernel_cso(vs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
-
- dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = ilo_shader_get_kernel_offset(vs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
-}
-
-static inline void
-gen6_3DSTATE_GS(struct ilo_builder *builder,
- const struct ilo_shader_state *gs,
- const struct ilo_shader_state *vs,
- int verts_per_prim)
-{
- const uint8_t cmd_len = 7;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
- uint32_t dw1, dw2, dw4, dw5, dw6, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- if (gs) {
- const struct ilo_shader_cso *cso;
-
- dw1 = ilo_shader_get_kernel_offset(gs);
-
- cso = ilo_shader_get_kernel_cso(gs);
- dw2 = cso->payload[0];
- dw4 = cso->payload[1];
- dw5 = cso->payload[2];
- dw6 = cso->payload[3];
- }
- else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
- struct ilo_shader_cso cso;
- enum ilo_kernel_param param;
-
- switch (verts_per_prim) {
- case 1:
- param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
- break;
- case 2:
- param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
- break;
- default:
- param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
- break;
- }
-
- dw1 = ilo_shader_get_kernel_offset(vs) +
- ilo_shader_get_kernel_param(vs, param);
-
- /* cannot use VS's CSO */
- ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso);
- dw2 = cso.payload[0];
- dw4 = cso.payload[1];
- dw5 = cso.payload[2];
- dw6 = cso.payload[3];
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
- dw5 = GEN6_GS_DW5_STATISTICS;
- dw6 = 0;
- }
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = dw1;
- dw[2] = dw2;
- dw[3] = 0;
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = dw6;
-}
-
-static inline void
-gen6_3DSTATE_CLIP(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs,
- bool enable_guardband,
- int num_viewports)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
- uint32_t dw1, dw2, dw3, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- if (rasterizer) {
- int interps;
-
- dw1 = rasterizer->clip.payload[0];
- dw2 = rasterizer->clip.payload[1];
- dw3 = rasterizer->clip.payload[2];
-
- if (enable_guardband && rasterizer->clip.can_enable_guardband)
- dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
-
- interps = (fs) ? ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
- if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
- GEN6_INTERP_NONPERSPECTIVE_CENTROID |
- GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
- dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
-
- dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
- (num_viewports - 1);
- }
- else {
- dw1 = 0;
- dw2 = 0;
- dw3 = 0;
- }
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = dw1;
- dw[2] = dw2;
- dw[3] = dw3;
-}
-
-static inline void
-gen6_3DSTATE_SF(struct ilo_builder *builder,
- const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs)
-{
- const uint8_t cmd_len = 20;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
- uint32_t payload_raster[6], payload_sbe[13], *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer,
- 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
- ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer,
- fs, payload_sbe, Elements(payload_sbe));
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = payload_sbe[0];
- memcpy(&dw[2], payload_raster, sizeof(payload_raster));
- memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4);
-}
-
-static inline void
-gen6_3DSTATE_WM(struct ilo_builder *builder,
- const struct ilo_shader_state *fs,
- int num_samplers,
- const struct ilo_rasterizer_state *rasterizer,
- bool dual_blend, bool cc_may_kill,
- uint32_t hiz_op)
-{
- const uint8_t cmd_len = 9;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
- const int num_samples = 1;
- const struct ilo_shader_cso *fs_cso;
- uint32_t dw2, dw4, dw5, dw6, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- if (!fs) {
- /* see brwCreateContext() */
- const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw[4] = hiz_op;
- /* honor the valid range even if dispatching is disabled */
- dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
- dw[6] = 0;
- dw[7] = 0;
- dw[8] = 0;
-
- return;
- }
-
- fs_cso = ilo_shader_get_kernel_cso(fs);
- dw2 = fs_cso->payload[0];
- dw4 = fs_cso->payload[1];
- dw5 = fs_cso->payload[2];
- dw6 = fs_cso->payload[3];
-
- dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) must be disabled if either of these
- * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
- * Enable or Depth Buffer Resolve Enable."
- */
- assert(!hiz_op);
- dw4 |= GEN6_WM_DW4_STATISTICS;
-
- if (cc_may_kill)
- dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE;
-
- if (dual_blend)
- dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND;
-
- dw5 |= rasterizer->wm.payload[0];
-
- dw6 |= rasterizer->wm.payload[1];
-
- if (num_samples > 1) {
- dw6 |= rasterizer->wm.dw_msaa_rast |
- rasterizer->wm.dw_msaa_disp;
- }
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = ilo_shader_get_kernel_offset(fs);
- dw[2] = dw2;
- dw[3] = 0; /* scratch */
- dw[4] = dw4;
- dw[5] = dw5;
- dw[6] = dw6;
- dw[7] = 0; /* kernel 1 */
- dw[8] = 0; /* kernel 2 */
-}
-
-static inline unsigned
-gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
- const uint32_t *bufs, const int *sizes,
- int num_bufs, int max_read_length,
- uint32_t *dw, int num_dwords)
-{
- unsigned enabled = 0x0;
- int total_read_length, i;
-
- assert(num_dwords == 4);
-
- total_read_length = 0;
- for (i = 0; i < 4; i++) {
- if (i < num_bufs && sizes[i]) {
- /* in 256-bit units minus one */
- const int read_len = (sizes[i] + 31) / 32 - 1;
-
- assert(bufs[i] % 32 == 0);
- assert(read_len < 32);
-
- enabled |= 1 << i;
- dw[i] = bufs[i] | read_len;
-
- total_read_length += read_len + 1;
- }
- else {
- dw[i] = 0;
- }
- }
-
- assert(total_read_length <= max_read_length);
-
- return enabled;
-}
-
-static inline void
-gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
-{
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 138:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 32"
- */
- buf_enabled = gen6_fill_3dstate_constant(builder->dev,
- bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
-
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
- buf_enabled << 12 |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- memcpy(&dw[1], buf_dw, sizeof(buf_dw));
-}
-
-static inline void
-gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
-{
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 161:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
- */
- buf_enabled = gen6_fill_3dstate_constant(builder->dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
- buf_enabled << 12 |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- memcpy(&dw[1], buf_dw, sizeof(buf_dw));
-}
-
-static inline void
-gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
- const uint32_t *bufs, const int *sizes,
- int num_bufs)
-{
- const uint8_t cmd_len = 5;
- uint32_t buf_dw[4], buf_enabled;
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
- assert(num_bufs <= 4);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 287:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to 64"
- */
- buf_enabled = gen6_fill_3dstate_constant(builder->dev,
- bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
- dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) |
- buf_enabled << 12 |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- memcpy(&dw[1], buf_dw, sizeof(buf_dw));
-}
-
-static inline void
-gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
- unsigned sample_mask)
-{
- const uint8_t cmd_len = 2;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
- (cmd_len - 2);
- const unsigned valid_mask = 0xf;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- sample_mask &= valid_mask;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = sample_mask;
-}
-
-static inline void
-gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder,
- unsigned x, unsigned y,
- unsigned width, unsigned height)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) |
- (cmd_len - 2);
- unsigned xmax = x + width - 1;
- unsigned ymax = y + height - 1;
- int rect_limit;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- rect_limit = 16383;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 230:
- *
- * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
- * must be an even number"
- */
- assert(y % 2 == 0);
-
- rect_limit = 8191;
- }
-
- if (x > rect_limit) x = rect_limit;
- if (y > rect_limit) y = rect_limit;
- if (xmax > rect_limit) xmax = rect_limit;
- if (ymax > rect_limit) ymax = rect_limit;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
- dw[0] = dw0;
- dw[1] = y << 16 | x;
- dw[2] = ymax << 16 | xmax;
-
- /*
- * There is no need to set the origin. It is intended to support front
- * buffer rendering.
- */
- dw[3] = 0;
-}
-
static inline void
zs_align_surface(const struct ilo_dev_info *dev,
unsigned align_w, unsigned align_h,
@@ -1155,843 +115,4 @@ zs_align_surface(const struct ilo_dev_info *dev,
zs->payload[2] = dw3;
}
-static inline void
-gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs)
-{
- const uint8_t cmd_len = 7;
- unsigned pos;
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
- GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
- GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
- dw0 |= (cmd_len - 2);
-
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = zs->payload[0];
-
- if (zs->bo) {
- ilo_builder_batch_reloc(builder, pos + 2,
- zs->bo, zs->payload[1], INTEL_RELOC_WRITE);
- } else {
- dw[2] = 0;
- }
-
- dw[3] = zs->payload[2];
- dw[4] = zs->payload[3];
- dw[5] = zs->payload[4];
- dw[6] = zs->payload[5];
-}
-
-static inline void
-gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
- int x_offset, int y_offset)
-{
- const uint8_t cmd_len = 2;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- assert(x_offset >= 0 && x_offset <= 31);
- assert(y_offset >= 0 && y_offset <= 31);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = x_offset << 8 | y_offset;
-}
-
-static inline void
-gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
- const struct pipe_poly_stipple *pattern)
-{
- const uint8_t cmd_len = 33;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) |
- (cmd_len - 2);
- uint32_t *dw;
- int i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- STATIC_ASSERT(Elements(pattern->stipple) == 32);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw++;
-
- for (i = 0; i < 32; i++)
- dw[i] = pattern->stipple[i];
-}
-
-static inline void
-gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
- unsigned pattern, unsigned factor)
-{
- const uint8_t cmd_len = 3;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) |
- (cmd_len - 2);
- uint32_t *dw;
- unsigned inverse;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- assert((pattern & 0xffff) == pattern);
- assert(factor >= 1 && factor <= 256);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = pattern;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- /* in U1.16 */
- inverse = (unsigned) (65536.0f / factor);
- dw[2] = inverse << 15 | factor;
- }
- else {
- /* in U1.13 */
- inverse = (unsigned) (8192.0f / factor);
- dw[2] = inverse << 16 | factor;
- }
-}
-
-static inline void
-gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
-{
- const uint8_t cmd_len = 3;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = 0 << 16 | 0;
- dw[2] = 0 << 16 | 0;
-}
-
-static inline void
-gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
- int index, unsigned svbi,
- unsigned max_svbi,
- bool load_vertex_count)
-{
- const uint8_t cmd_len = 4;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) |
- (cmd_len - 2);
- uint32_t dw1, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
- assert(index >= 0 && index < 4);
-
- dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
- if (load_vertex_count)
- dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = dw1;
- dw[2] = svbi;
- dw[3] = max_svbi;
-}
-
-static inline void
-gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
- int num_samples,
- const uint32_t *packed_sample_pos,
- bool pixel_location_center)
-{
- const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) |
- (cmd_len - 2);
- uint32_t dw1, dw2, dw3, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- dw1 = (pixel_location_center) ?
- GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
- switch (num_samples) {
- case 0:
- case 1:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- case 4:
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
- dw2 = packed_sample_pos[0];
- dw3 = 0;
- break;
- case 8:
- assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
- dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
- dw2 = packed_sample_pos[0];
- dw3 = packed_sample_pos[1];
- break;
- default:
- assert(!"unsupported sample count");
- dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
- dw2 = 0;
- dw3 = 0;
- break;
- }
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = dw1;
- dw[2] = dw2;
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
- dw[2] = dw3;
-}
-
-static inline void
-gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs)
-{
- const uint8_t cmd_len = 3;
- uint32_t dw0, *dw;
- unsigned pos;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
- GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
- GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
- dw0 |= (cmd_len - 2);
-
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- /* see ilo_gpe_init_zs_surface() */
- dw[1] = zs->payload[6];
-
- if (zs->separate_s8_bo) {
- ilo_builder_batch_reloc(builder, pos + 2,
- zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
- } else {
- dw[2] = 0;
- }
-}
-
-static inline void
-gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
- const struct ilo_zs_surface *zs)
-{
- const uint8_t cmd_len = 3;
- uint32_t dw0, *dw;
- unsigned pos;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
- GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
- GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
- dw0 |= (cmd_len - 2);
-
- pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- /* see ilo_gpe_init_zs_surface() */
- dw[1] = zs->payload[8];
-
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc(builder, pos + 2,
- zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE);
- } else {
- dw[2] = 0;
- }
-}
-
-static inline void
-gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
- uint32_t clear_val)
-{
- const uint8_t cmd_len = 2;
- const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
- GEN6_CLEAR_PARAMS_DW0_VALID |
- (cmd_len - 2);
- uint32_t *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = clear_val;
-}
-
-static inline void
-gen6_3DPRIMITIVE(struct ilo_builder *builder,
- const struct pipe_draw_info *info,
- const struct ilo_ib_state *ib,
- bool rectlist)
-{
- const uint8_t cmd_len = 6;
- const int prim = (rectlist) ?
- GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
- const int vb_access = (info->indexed) ?
- GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
- const uint32_t vb_start = info->start +
- ((info->indexed) ? ib->draw_start_offset : 0);
- uint32_t dw0, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
- vb_access |
- prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
- (cmd_len - 2);
-
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
- dw[0] = dw0;
- dw[1] = info->count;
- dw[2] = vb_start;
- dw[3] = info->instance_count;
- dw[4] = info->start_instance;
- dw[5] = info->index_bias;
-}
-
-static inline uint32_t
-gen6_SF_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
-{
- const int state_align = 32;
- const int state_len = 8 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 262:
- *
- * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->m00);
- dw[1] = fui(vp->m11);
- dw[2] = fui(vp->m22);
- dw[3] = fui(vp->m30);
- dw[4] = fui(vp->m31);
- dw[5] = fui(vp->m32);
- dw[6] = 0;
- dw[7] = 0;
-
- dw += 8;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
-{
- const int state_align = 32;
- const int state_len = 4 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 193:
- *
- * "The viewport-related state is stored as an array of up to 16
- * elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_gbx);
- dw[1] = fui(vp->max_gbx);
- dw[2] = fui(vp->min_gby);
- dw[3] = fui(vp->max_gby);
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_CC_VIEWPORT(struct ilo_builder *builder,
- const struct ilo_viewport_cso *viewports,
- unsigned num_viewports)
-{
- const int state_align = 32;
- const int state_len = 2 * num_viewports;
- uint32_t state_offset, *dw;
- unsigned i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 385:
- *
- * "The viewport state is stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw);
-
- for (i = 0; i < num_viewports; i++) {
- const struct ilo_viewport_cso *vp = &viewports[i];
-
- dw[0] = fui(vp->min_z);
- dw[1] = fui(vp->max_z);
-
- dw += 2;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_COLOR_CALC_STATE(struct ilo_builder *builder,
- const struct pipe_stencil_ref *stencil_ref,
- ubyte alpha_ref,
- const struct pipe_blend_color *blend_color)
-{
- const int state_align = 64;
- const int state_len = 6;
- uint32_t state_offset, *dw;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw);
-
- dw[0] = stencil_ref->ref_value[0] << 24 |
- stencil_ref->ref_value[1] << 16 |
- GEN6_CC_DW0_ALPHATEST_UNORM8;
- dw[1] = alpha_ref;
- dw[2] = fui(blend_color->color[0]);
- dw[3] = fui(blend_color->color[1]);
- dw[4] = fui(blend_color->color[2]);
- dw[5] = fui(blend_color->color[3]);
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_BLEND_STATE(struct ilo_builder *builder,
- const struct ilo_blend_state *blend,
- const struct ilo_fb_state *fb,
- const struct ilo_dsa_state *dsa)
-{
- const int state_align = 64;
- int state_len;
- uint32_t state_offset, *dw;
- unsigned num_targets, i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 376:
- *
- * "The blend state is stored as an array of up to 8 elements..."
- */
- num_targets = fb->state.nr_cbufs;
- assert(num_targets <= 8);
-
- if (!num_targets) {
- if (!dsa->dw_alpha)
- return 0;
- /* to be able to reference alpha func */
- num_targets = 1;
- }
-
- state_len = 2 * num_targets;
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
-
- for (i = 0; i < num_targets; i++) {
- const unsigned idx = (blend->independent_blend_enable) ? i : 0;
- const struct ilo_blend_cso *cso = &blend->cso[idx];
- const int num_samples = fb->num_samples;
- const struct util_format_description *format_desc =
- (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
- util_format_description(fb->state.cbufs[idx]->format) : NULL;
- bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
-
- rt_is_unorm = true;
- rt_is_pure_integer = false;
- rt_dst_alpha_forced_one = false;
-
- if (format_desc) {
- int ch;
-
- switch (format_desc->format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- /* force alpha to one when the HW format has alpha */
- assert(ilo_translate_render_format(builder->dev,
- PIPE_FORMAT_B8G8R8X8_UNORM) ==
- GEN6_FORMAT_B8G8R8A8_UNORM);
- rt_dst_alpha_forced_one = true;
- break;
- default:
- break;
- }
-
- for (ch = 0; ch < 4; ch++) {
- if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
- continue;
-
- if (format_desc->channel[ch].pure_integer) {
- rt_is_unorm = false;
- rt_is_pure_integer = true;
- break;
- }
-
- if (!format_desc->channel[ch].normalized ||
- format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
- rt_is_unorm = false;
- }
- }
-
- dw[0] = cso->payload[0];
- dw[1] = cso->payload[1];
-
- if (!rt_is_pure_integer) {
- if (rt_dst_alpha_forced_one)
- dw[0] |= cso->dw_blend_dst_alpha_forced_one;
- else
- dw[0] |= cso->dw_blend;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Logic Ops are only supported on *_UNORM surfaces (excluding
- * _SRGB variants), otherwise Logic Ops must be DISABLED."
- *
- * Since logicop is ignored for non-UNORM color buffers, no special care
- * is needed.
- */
- if (rt_is_unorm)
- dw[1] |= cso->dw_logicop;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 356:
- *
- * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
- * Dither both must be disabled."
- *
- * There is no such limitation on GEN7, or for AlphaToOne. But GL
- * requires that anyway.
- */
- if (num_samples > 1)
- dw[1] |= cso->dw_alpha_mod;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 382:
- *
- * "Alpha Test can only be enabled if Pixel Shader outputs a float
- * alpha value."
- */
- if (!rt_is_pure_integer)
- dw[1] |= dsa->dw_alpha;
-
- dw += 2;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder,
- const struct ilo_dsa_state *dsa)
-{
- const int state_align = 64;
- const int state_len = 3;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- STATIC_ASSERT(Elements(dsa->payload) >= state_len);
-
- return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL,
- state_align, state_len, dsa->payload);
-}
-
-static inline uint32_t
-gen6_SCISSOR_RECT(struct ilo_builder *builder,
- const struct ilo_scissor_state *scissor,
- unsigned num_viewports)
-{
- const int state_align = 32;
- const int state_len = 2 * num_viewports;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 263:
- *
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements..."
- */
- assert(num_viewports && num_viewports <= 16);
- assert(Elements(scissor->payload) >= state_len);
-
- return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT,
- state_align, state_len, scissor->payload);
-}
-
-static inline uint32_t
-gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
- uint32_t *surface_states,
- int num_surface_states)
-{
- const int state_align = 32;
- const int state_len = num_surface_states;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 69:
- *
- * "It is stored as an array of up to 256 elements..."
- */
- assert(num_surface_states <= 256);
-
- if (!num_surface_states)
- return 0;
-
- return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE,
- state_align, state_len, surface_states);
-}
-
-static inline uint32_t
-gen6_SURFACE_STATE(struct ilo_builder *builder,
- const struct ilo_view_surface *surf,
- bool for_render)
-{
- const int state_align = 32;
- const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6;
- uint32_t state_offset;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE,
- state_align, state_len, surf->payload);
-
- if (surf->bo) {
- ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
- surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_so_SURFACE_STATE(struct ilo_builder *builder,
- const struct pipe_stream_output_target *so,
- const struct pipe_stream_output_info *so_info,
- int so_index)
-{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
- unsigned bo_offset, struct_size;
- enum pipe_format elem_format;
- struct ilo_view_surface surf;
-
- ILO_DEV_ASSERT(builder->dev, 6, 6);
-
- bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
- switch (so_info->output[so_index].num_components) {
- case 1:
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- case 2:
- elem_format = PIPE_FORMAT_R32G32_FLOAT;
- break;
- case 3:
- elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
- break;
- case 4:
- elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- break;
- default:
- assert(!"unexpected SO components length");
- elem_format = PIPE_FORMAT_R32_FLOAT;
- break;
- }
-
- ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset,
- so->buffer_size, struct_size, elem_format, false, true, &surf);
-
- return gen6_SURFACE_STATE(builder, &surf, false);
-}
-
-static inline uint32_t
-gen6_SAMPLER_STATE(struct ilo_builder *builder,
- const struct ilo_sampler_cso * const *samplers,
- const struct pipe_sampler_view * const *views,
- const uint32_t *sampler_border_colors,
- int num_samplers)
-{
- const int state_align = 32;
- const int state_len = 4 * num_samplers;
- uint32_t state_offset, *dw;
- int i;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 101:
- *
- * "The sampler state is stored as an array of up to 16 elements..."
- */
- assert(num_samplers <= 16);
-
- if (!num_samplers)
- return 0;
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
-
- for (i = 0; i < num_samplers; i++) {
- const struct ilo_sampler_cso *sampler = samplers[i];
- const struct pipe_sampler_view *view = views[i];
- const uint32_t border_color = sampler_border_colors[i];
- uint32_t dw_filter, dw_wrap;
-
- /* there may be holes */
- if (!sampler || !view) {
- /* disabled sampler */
- dw[0] = 1 << 31;
- dw[1] = 0;
- dw[2] = 0;
- dw[3] = 0;
- dw += 4;
-
- continue;
- }
-
- /* determine filter and wrap modes */
- switch (view->texture->target) {
- case PIPE_TEXTURE_1D:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_1d;
- break;
- case PIPE_TEXTURE_3D:
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 103:
- *
- * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
- * surfaces of type SURFTYPE_3D."
- */
- dw_filter = sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- case PIPE_TEXTURE_CUBE:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap_cube;
- break;
- default:
- dw_filter = (sampler->anisotropic) ?
- sampler->dw_filter_aniso : sampler->dw_filter;
- dw_wrap = sampler->dw_wrap;
- break;
- }
-
- dw[0] = sampler->payload[0];
- dw[1] = sampler->payload[1];
- assert(!(border_color & 0x1f));
- dw[2] = border_color;
- dw[3] = sampler->payload[2];
-
- dw[0] |= dw_filter;
-
- if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
- dw[3] |= dw_wrap;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 21:
- *
- * "[DevSNB] Errata: Incorrect behavior is observed in cases
- * where the min and mag mode filters are different and
- * SurfMinLOD is nonzero. The determination of MagMode uses the
- * following equation instead of the one in the above
- * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
- *
- * As a way to work around that, we set Base to
- * view->u.tex.first_level.
- */
- dw[0] |= view->u.tex.first_level << 22;
-
- dw[1] |= dw_wrap;
- }
-
- dw += 4;
- }
-
- return state_offset;
-}
-
-static inline uint32_t
-gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
- const struct ilo_sampler_cso *sampler)
-{
- const int state_align = 32;
- const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- assert(Elements(sampler->payload) >= 3 + state_len);
-
- /* see ilo_gpe_init_sampler_cso() */
- return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB,
- state_align, state_len, &sampler->payload[3]);
-}
-
-static inline uint32_t
-gen6_push_constant_buffer(struct ilo_builder *builder,
- int size, void **pcb)
-{
- /*
- * For all VS, GS, FS, and CS push constant buffers, they must be aligned
- * to 32 bytes, and their sizes are specified in 256-bit units.
- */
- const int state_align = 32;
- const int state_len = align(size, 32) / 4;
- uint32_t state_offset;
- char *buf;
-
- ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
- state_offset = ilo_builder_state_pointer(builder,
- ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf);
-
- /* zero out the unused range */
- if (size < state_len * 4)
- memset(&buf[size], 0, state_len * 4 - size);
-
- if (pcb)
- *pcb = buf;
-
- return state_offset;
-}
-
#endif /* ILO_GPE_GEN6_H */
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c
index 0e5602c65a6..21b4bfd6101 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c
@@ -31,7 +31,8 @@
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
-#include "ilo_gpe_gen7.h"
+#include "ilo_gpe_gen6.h"
+#include "ilo_gpe.h"
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,