diff options
author | Chia-I Wu <[email protected]> | 2014-09-12 23:44:19 +0800 |
---|---|---|
committer | Chia-I Wu <[email protected]> | 2014-09-13 09:31:08 +0800 |
commit | ea8e7a8d4a32ff8d3eea2dce871cfbd6b833cc87 (patch) | |
tree | 1b65ca57942ed18f57bace7ae07476bb385f765e /src/gallium | |
parent | aec8521166d8acc9211db864a24ec087d7d2e7f2 (diff) |
ilo: move 3D functions to ilo_builder_3d*.h
Move functions for the 3D pipeline to the new headers. We artificially split
the functions into top (vertex processing) and bottom (pixel processing), to
keep the headers at reasonable sizes.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/ilo/Makefile.sources | 4 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_blitter_rectlist.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_builder_3d.h | 125 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_builder_3d_bottom.h | 1334 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_builder_3d_top.h (renamed from src/gallium/drivers/ilo/ilo_gpe_gen7.h) | 1796 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_gpe_gen6.h | 1879 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_gpe_gen7.c | 3 |
9 files changed, 2614 insertions, 2538 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index d086025e248..0a631e809af 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -16,6 +16,9 @@ C_SOURCES := \ ilo_blitter_rectlist.c \ ilo_builder.c \ ilo_builder.h \ + ilo_blitter_3d.h \ + ilo_blitter_3d_bottom.h \ + ilo_blitter_3d_top.h \ ilo_blitter_blt.h \ ilo_builder_decode.c \ ilo_builder_media.h \ @@ -31,7 +34,6 @@ C_SOURCES := \ ilo_gpe_gen6.c \ ilo_gpe_gen6.h \ ilo_gpe_gen7.c \ - ilo_gpe_gen7.h \ ilo_gpe.h \ ilo_gpgpu.c \ ilo_gpgpu.h \ diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index 88dd3ae67fd..3e37c68f3f9 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -31,12 +31,11 @@ #include "ilo_3d.h" #include "ilo_blitter.h" +#include "ilo_builder_3d.h" #include "ilo_builder_mi.h" #include "ilo_builder_render.h" #include "ilo_context.h" #include "ilo_cp.h" -#include "ilo_gpe_gen6.h" -#include "ilo_gpe_gen7.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_3d_pipeline.h" diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index fd1daf50d7d..14c0823df63 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -29,10 +29,10 @@ #include "util/u_dual_blend.h" #include "ilo_blitter.h" +#include "ilo_builder_3d.h" #include "ilo_builder_render.h" #include "ilo_context.h" #include "ilo_cp.h" -#include "ilo_gpe_gen7.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_3d_pipeline.h" diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 009780a1cd9..5dd0b1ec5a9 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -28,13 +28,13 @@ #include "util/u_draw.h" #include "util/u_pack_color.h" -#include "ilo_blitter.h" #include "ilo_3d.h" #include "ilo_3d_pipeline.h" +#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */ +#include "ilo_gpe_gen6.h" /* for zs_align_surface() */ #include "ilo_blit.h" #include "ilo_gpe.h" -#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and - zs_align_surface */ +#include "ilo_blitter.h" /** * Set the states that are invariant between all ops. diff --git a/src/gallium/drivers/ilo/ilo_builder_3d.h b/src/gallium/drivers/ilo/ilo_builder_3d.h new file mode 100644 index 00000000000..c94fd718ee3 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_builder_3d.h @@ -0,0 +1,125 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef ILO_BUILDER_3D_H +#define ILO_BUILDER_3D_H + +#include "genhw/genhw.h" + +#include "ilo_common.h" +#include "ilo_builder_3d_top.h" +#include "ilo_builder_3d_bottom.h" + +/** + * Translate a pipe primitive type to the matching hardware primitive type. + */ +static inline int +ilo_gpe_gen6_translate_pipe_prim(unsigned prim) +{ + static const int prim_mapping[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, + [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, + [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, + [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, + [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, + }; + + assert(prim_mapping[prim]); + + return prim_mapping[prim]; +} + +static inline void +gen6_3DPRIMITIVE(struct ilo_builder *builder, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist) +{ + const uint8_t cmd_len = 6; + const int prim = (rectlist) ? + GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | + vb_access | + prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = info->count; + dw[2] = vb_start; + dw[3] = info->instance_count; + dw[4] = info->start_instance; + dw[5] = info->index_bias; +} + +static inline void +gen7_3DPRIMITIVE(struct ilo_builder *builder, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); + const int prim = (rectlist) ? + GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN7_3DPRIM_DW1_ACCESS_RANDOM : + GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vb_access | prim; + dw[2] = info->count; + dw[3] = vb_start; + dw[4] = info->instance_count; + dw[5] = info->start_instance; + dw[6] = info->index_bias; +} + +#endif /* ILO_BUILDER_3D_H */ diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h new file mode 100644 index 00000000000..6427228a64c --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h @@ -0,0 +1,1334 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef ILO_BUILDER_3D_BOTTOM_H +#define ILO_BUILDER_3D_BOTTOM_H + +#include "genhw/genhw.h" +#include "intel_winsys.h" + +#include "ilo_common.h" +#include "ilo_format.h" +#include "ilo_shader.h" +#include "ilo_builder.h" +#include "ilo_builder_3d_top.h" + +static inline void +gen6_3DSTATE_CLIP(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + bool enable_guardband, + int num_viewports) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); + uint32_t dw1, dw2, dw3, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (rasterizer) { + int interps; + + dw1 = rasterizer->clip.payload[0]; + dw2 = rasterizer->clip.payload[1]; + dw3 = rasterizer->clip.payload[2]; + + if (enable_guardband && rasterizer->clip.can_enable_guardband) + dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; + + interps = (fs) ? ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; + + if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | + GEN6_INTERP_NONPERSPECTIVE_CENTROID | + GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) + dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; + + dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | + (num_viewports - 1); + } + else { + dw1 = 0; + dw2 = 0; + dw3 = 0; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + dw[3] = dw3; +} + +/** + * Fill in DW2 to DW7 of 3DSTATE_SF. + */ +static inline void +ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + int num_samples, + enum pipe_format depth_format, + uint32_t *payload, unsigned payload_len) +{ + assert(payload_len == Elements(rasterizer->sf.payload)); + + if (rasterizer) { + const struct ilo_rasterizer_sf *sf = &rasterizer->sf; + + memcpy(payload, sf->payload, sizeof(sf->payload)); + if (num_samples > 1) + payload[1] |= sf->dw_msaa; + } + else { + payload[0] = 0; + payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; + payload[2] = 0; + payload[3] = 0; + payload[4] = 0; + payload[5] = 0; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + int format; + + /* separate stencil */ + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: + format = GEN6_ZFORMAT_D16_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + format = GEN6_ZFORMAT_D32_FLOAT; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; + break; + default: + /* FLOAT surface is assumed when there is no depth buffer */ + format = GEN6_ZFORMAT_D32_FLOAT; + break; + } + + payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; + } +} + +/** + * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. + */ +static inline void +ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + uint32_t *dw, int num_dwords) +{ + int output_count, vue_offset, vue_len; + const struct ilo_kernel_routing *routing; + + ILO_DEV_ASSERT(dev, 6, 7.5); + assert(num_dwords == 13); + + if (!fs) { + memset(dw, 0, sizeof(dw[0]) * num_dwords); + dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; + return; + } + + output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + assert(output_count <= 32); + + routing = ilo_shader_get_kernel_routing(fs); + + vue_offset = routing->source_skip; + assert(vue_offset % 2 == 0); + vue_offset /= 2; + + vue_len = (routing->source_len + 1) / 2; + if (!vue_len) + vue_len = 1; + + dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | + vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | + vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; + + switch (rasterizer->state.sprite_coord_mode) { + case PIPE_SPRITE_COORD_UPPER_LEFT: + dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; + break; + case PIPE_SPRITE_COORD_LOWER_LEFT: + dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; + break; + } + + STATIC_ASSERT(Elements(routing->swizzles) >= 16); + memcpy(&dw[1], routing->swizzles, 2 * 16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + * + * TODO We do not check that yet. + */ + dw[9] = routing->point_sprite_enable; + + dw[10] = routing->const_interp_enable; + + /* WrapShortest enables */ + dw[11] = 0; + dw[12] = 0; +} + +static inline void +gen6_3DSTATE_SF(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs) +{ + const uint8_t cmd_len = 20; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); + uint32_t payload_raster[6], payload_sbe[13], *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer, + 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); + ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer, + fs, payload_sbe, Elements(payload_sbe)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = payload_sbe[0]; + memcpy(&dw[2], payload_raster, sizeof(payload_raster)); + memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4); +} + +static inline void +gen7_3DSTATE_SF(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + enum pipe_format zs_format) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); + const int num_samples = 1; + uint32_t payload[6], *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, + rasterizer, num_samples, zs_format, + payload, Elements(payload)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], payload, sizeof(payload)); +} + +static inline void +gen7_3DSTATE_SBE(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs) +{ + const uint8_t cmd_len = 14; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); + uint32_t payload[13], *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, + rasterizer, fs, payload, Elements(payload)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], payload, sizeof(payload)); +} + +static inline void +gen6_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + int num_samplers, + const struct ilo_rasterizer_state *rasterizer, + bool dual_blend, bool cc_may_kill, + uint32_t hiz_op) +{ + const uint8_t cmd_len = 9; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); + const int num_samples = 1; + const struct ilo_shader_cso *fs_cso; + uint32_t dw2, dw4, dw5, dw6, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (builder->dev->gt == 2) ? 80 : 40; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = hiz_op; + /* honor the valid range even if dispatching is disabled */ + dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; + dw[6] = 0; + dw[7] = 0; + dw[8] = 0; + + return; + } + + fs_cso = ilo_shader_get_kernel_cso(fs); + dw2 = fs_cso->payload[0]; + dw4 = fs_cso->payload[1]; + dw5 = fs_cso->payload[2]; + dw6 = fs_cso->payload[3]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) must be disabled if either of these + * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve + * Enable or Depth Buffer Resolve Enable." + */ + assert(!hiz_op); + dw4 |= GEN6_WM_DW4_STATISTICS; + + if (cc_may_kill) + dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE; + + if (dual_blend) + dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; + + dw5 |= rasterizer->wm.payload[0]; + + dw6 |= rasterizer->wm.payload[1]; + + if (num_samples > 1) { + dw6 |= rasterizer->wm.dw_msaa_rast | + rasterizer->wm.dw_msaa_disp; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(fs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = dw6; + dw[7] = 0; /* kernel 1 */ + dw[8] = 0; /* kernel 2 */ +} + +static inline void +gen7_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + const struct ilo_rasterizer_state *rasterizer, + bool cc_may_kill, uint32_t hiz_op) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); + const int num_samples = 1; + uint32_t dw1, dw2, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* see ilo_gpe_init_rasterizer_wm() */ + if (rasterizer) { + dw1 = rasterizer->wm.payload[0]; + dw2 = rasterizer->wm.payload[1]; + + assert(!hiz_op); + dw1 |= GEN7_WM_DW1_STATISTICS; + } + else { + dw1 = hiz_op; + dw2 = 0; + } + + if (fs) { + const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + + dw1 |= fs_cso->payload[3]; + } + + if (cc_may_kill) + dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL; + + if (num_samples > 1) { + dw1 |= rasterizer->wm.dw_msaa_rast; + dw2 |= rasterizer->wm.dw_msaa_disp; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; +} + +static inline void +gen7_3DSTATE_PS(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + int num_samplers, bool dual_blend) +{ + const uint8_t cmd_len = 8; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + if (!fs) { + int max_threads; + + /* GPU hangs if none of the dispatch enable bits is set */ + dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH; + + /* see brwCreateContext() */ + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_threads = (builder->dev->gt == 3) ? 408 : + (builder->dev->gt == 2) ? 204 : 102; + dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; + break; + case ILO_GEN(7): + default: + max_threads = (builder->dev->gt == 2) ? 172 : 48; + dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; + break; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = dw4; + dw[5] = 0; + dw[6] = 0; + dw[7] = 0; + + return; + } + + cso = ilo_shader_get_kernel_cso(fs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + if (dual_blend) + dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(fs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = 0; /* kernel 1 */ + dw[7] = 0; /* kernel 2 */ +} + +static inline void +gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | + buf_enabled << 12 | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); +} + +static inline void +gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, + bufs, sizes, num_bufs); +} + +static inline void +gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder, + uint32_t binding_table) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, + binding_table); +} + +static inline void +gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, + uint32_t sampler_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, + sampler_state); +} + +static inline void +gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, + int num_samples, + const uint32_t *packed_sample_pos, + bool pixel_location_center) +{ + const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | + (cmd_len - 2); + uint32_t dw1, dw2, dw3, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw1 = (pixel_location_center) ? + GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; + + switch (num_samples) { + case 0: + case 1: + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + case 4: + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; + dw2 = packed_sample_pos[0]; + dw3 = 0; + break; + case 8: + assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); + dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; + dw2 = packed_sample_pos[0]; + dw3 = packed_sample_pos[1]; + break; + default: + assert(!"unsupported sample count"); + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) + dw[2] = dw3; +} + +static inline void +gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, + unsigned sample_mask) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | + (cmd_len - 2); + const unsigned valid_mask = 0xf; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + sample_mask &= valid_mask; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = sample_mask; +} + +static inline void +gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, + unsigned sample_mask, + int num_samples) +{ + const uint8_t cmd_len = 2; + const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 294: + * + * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field + * (Sample Mask) must be zero. + * + * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field + * must be zero." + */ + sample_mask &= valid_mask; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = sample_mask; +} + +static inline void +gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, + unsigned x, unsigned y, + unsigned width, unsigned height) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | + (cmd_len - 2); + unsigned xmax = x + width - 1; + unsigned ymax = y + height - 1; + int rect_limit; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + rect_limit = 16383; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 230: + * + * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) + * must be an even number" + */ + assert(y % 2 == 0); + + rect_limit = 8191; + } + + if (x > rect_limit) x = rect_limit; + if (y > rect_limit) y = rect_limit; + if (xmax > rect_limit) xmax = rect_limit; + if (ymax > rect_limit) ymax = rect_limit; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = dw0; + dw[1] = y << 16 | x; + dw[2] = ymax << 16 | xmax; + + /* + * There is no need to set the origin. It is intended to support front + * buffer rendering. + */ + dw[3] = 0; +} + +static inline void +gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, + int x_offset, int y_offset) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + assert(x_offset >= 0 && x_offset <= 31); + assert(y_offset >= 0 && y_offset <= 31); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = x_offset << 8 | y_offset; +} + +static inline void +gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, + const struct pipe_poly_stipple *pattern) +{ + const uint8_t cmd_len = 33; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | + (cmd_len - 2); + uint32_t *dw; + int i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + STATIC_ASSERT(Elements(pattern->stipple) == 32); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw++; + + for (i = 0; i < 32; i++) + dw[i] = pattern->stipple[i]; +} + +static inline void +gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, + unsigned pattern, unsigned factor) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | + (cmd_len - 2); + uint32_t *dw; + unsigned inverse; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + assert((pattern & 0xffff) == pattern); + assert(factor >= 1 && factor <= 256); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = pattern; + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + /* in U1.16 */ + inverse = (unsigned) (65536.0f / factor); + dw[2] = inverse << 15 | factor; + } + else { + /* in U1.13 */ + inverse = (unsigned) (8192.0f / factor); + dw[2] = inverse << 16 | factor; + } +} + +static inline void +gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0 << 16 | 0; + dw[2] = 0 << 16 | 0; +} + +static inline void +gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 7; + unsigned pos; + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = zs->payload[0]; + + if (zs->bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->bo, zs->payload[1], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } + + dw[3] = zs->payload[2]; + dw[4] = zs->payload[3]; + dw[5] = zs->payload[4]; + dw[6] = zs->payload[5]; +} + +static inline void +gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 3; + uint32_t dw0, *dw; + unsigned pos; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + /* see ilo_gpe_init_zs_surface() */ + dw[1] = zs->payload[6]; + + if (zs->separate_s8_bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } +} + +static inline void +gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 3; + uint32_t dw0, *dw; + unsigned pos; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + /* see ilo_gpe_init_zs_surface() */ + dw[1] = zs->payload[8]; + + if (zs->hiz_bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } +} + +static inline void +gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, + uint32_t clear_val) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | + GEN6_CLEAR_PARAMS_DW0_VALID | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clear_val; +} + +static inline void +gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, + uint32_t clear_val) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clear_val; + dw[2] = 1; +} + +static inline void +gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder, + uint32_t clip_viewport, + uint32_t sf_viewport, + uint32_t cc_viewport) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | + GEN6_PTR_VP_DW0_CLIP_CHANGED | + GEN6_PTR_VP_DW0_SF_CHANGED | + GEN6_PTR_VP_DW0_CC_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clip_viewport; + dw[2] = sf_viewport; + dw[3] = cc_viewport; +} + +static inline void +gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder, + uint32_t scissor_rect) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = scissor_rect; +} + +static inline void +gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, + uint32_t blend_state, + uint32_t depth_stencil_state, + uint32_t color_calc_state) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = blend_state | 1; + dw[2] = depth_stencil_state | 1; + dw[3] = color_calc_state | 1; +} + +static inline void +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder, + uint32_t sf_clip_viewport) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + sf_clip_viewport); +} + +static inline void +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder, + uint32_t cc_viewport) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + cc_viewport); +} + +static inline void +gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, + uint32_t color_calc_state) +{ + gen7_3dstate_pointer(builder, + GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state); +} + +static inline void +gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder, + uint32_t depth_stencil_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + depth_stencil_state); +} + +static inline void +gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, + uint32_t blend_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, + blend_state); +} + +static inline uint32_t +gen6_CLIP_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 4 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 193: + * + * "The viewport-related state is stored as an array of up to 16 + * elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_gbx); + dw[1] = fui(vp->max_gbx); + dw[2] = fui(vp->min_gby); + dw[3] = fui(vp->max_gby); + + dw += 4; + } + + return state_offset; +} + +static inline uint32_t +gen6_SF_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 8 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 262: + * + * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static inline uint32_t +gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 64; + const int state_len = 16 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 270: + * + * "The viewport-specific state used by both the SF and CL units + * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each + * of which contains the DWords described below. The start of each + * element is spaced 16 DWords apart. The location of first element of + * the array, as specified by both Pointer to SF_VIEWPORT and Pointer + * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + dw[8] = fui(vp->min_gbx); + dw[9] = fui(vp->max_gbx); + dw[10] = fui(vp->min_gby); + dw[11] = fui(vp->max_gby); + dw[12] = 0; + dw[13] = 0; + dw[14] = 0; + dw[15] = 0; + + dw += 16; + } + + return state_offset; +} + +static inline uint32_t +gen6_CC_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 385: + * + * "The viewport state is stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_z); + dw[1] = fui(vp->max_z); + + dw += 2; + } + + return state_offset; +} + +static inline uint32_t +gen6_SCISSOR_RECT(struct ilo_builder *builder, + const struct ilo_scissor_state *scissor, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 2 * num_viewports; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 263: + * + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + assert(Elements(scissor->payload) >= state_len); + + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, + state_align, state_len, scissor->payload); +} + +static inline uint32_t +gen6_COLOR_CALC_STATE(struct ilo_builder *builder, + const struct pipe_stencil_ref *stencil_ref, + ubyte alpha_ref, + const struct pipe_blend_color *blend_color) +{ + const int state_align = 64; + const int state_len = 6; + uint32_t state_offset, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); + + dw[0] = stencil_ref->ref_value[0] << 24 | + stencil_ref->ref_value[1] << 16 | + GEN6_CC_DW0_ALPHATEST_UNORM8; + dw[1] = alpha_ref; + dw[2] = fui(blend_color->color[0]); + dw[3] = fui(blend_color->color[1]); + dw[4] = fui(blend_color->color[2]); + dw[5] = fui(blend_color->color[3]); + + return state_offset; +} + +static inline uint32_t +gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, + const struct ilo_dsa_state *dsa) +{ + const int state_align = 64; + const int state_len = 3; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + STATIC_ASSERT(Elements(dsa->payload) >= state_len); + + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, + state_align, state_len, dsa->payload); +} + +static inline uint32_t +gen6_BLEND_STATE(struct ilo_builder *builder, + const struct ilo_blend_state *blend, + const struct ilo_fb_state *fb, + const struct ilo_dsa_state *dsa) +{ + const int state_align = 64; + int state_len; + uint32_t state_offset, *dw; + unsigned num_targets, i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 376: + * + * "The blend state is stored as an array of up to 8 elements..." + */ + num_targets = fb->state.nr_cbufs; + assert(num_targets <= 8); + + if (!num_targets) { + if (!dsa->dw_alpha) + return 0; + /* to be able to reference alpha func */ + num_targets = 1; + } + + state_len = 2 * num_targets; + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); + + for (i = 0; i < num_targets; i++) { + const unsigned idx = (blend->independent_blend_enable) ? i : 0; + const struct ilo_blend_cso *cso = &blend->cso[idx]; + const int num_samples = fb->num_samples; + const struct util_format_description *format_desc = + (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ? + util_format_description(fb->state.cbufs[idx]->format) : NULL; + bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; + + rt_is_unorm = true; + rt_is_pure_integer = false; + rt_dst_alpha_forced_one = false; + + if (format_desc) { + int ch; + + switch (format_desc->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + /* force alpha to one when the HW format has alpha */ + assert(ilo_translate_render_format(builder->dev, + PIPE_FORMAT_B8G8R8X8_UNORM) == + GEN6_FORMAT_B8G8R8A8_UNORM); + rt_dst_alpha_forced_one = true; + break; + default: + break; + } + + for (ch = 0; ch < 4; ch++) { + if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) + continue; + + if (format_desc->channel[ch].pure_integer) { + rt_is_unorm = false; + rt_is_pure_integer = true; + break; + } + + if (!format_desc->channel[ch].normalized || + format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) + rt_is_unorm = false; + } + } + + dw[0] = cso->payload[0]; + dw[1] = cso->payload[1]; + + if (!rt_is_pure_integer) { + if (rt_dst_alpha_forced_one) + dw[0] |= cso->dw_blend_dst_alpha_forced_one; + else + dw[0] |= cso->dw_blend; + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding + * _SRGB variants), otherwise Logic Ops must be DISABLED." + * + * Since logicop is ignored for non-UNORM color buffers, no special care + * is needed. + */ + if (rt_is_unorm) + dw[1] |= cso->dw_logicop; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage + * Dither both must be disabled." + * + * There is no such limitation on GEN7, or for AlphaToOne. But GL + * requires that anyway. + */ + if (num_samples > 1) + dw[1] |= cso->dw_alpha_mod; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + if (!rt_is_pure_integer) + dw[1] |= dsa->dw_alpha; + + dw += 2; + } + + return state_offset; +} + +#endif /* ILO_BUILDER_3D_BOTTOM_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h index 9739665d753..e742f63d698 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library * - * Copyright (C) 2013 LunarG, Inc. + * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,328 +25,632 @@ * Chia-I Wu <[email protected]> */ -#ifndef ILO_GPE_GEN7_H -#define ILO_GPE_GEN7_H +#ifndef ILO_BUILDER_3D_TOP_H +#define ILO_BUILDER_3D_TOP_H +#include "genhw/genhw.h" #include "intel_winsys.h" #include "ilo_common.h" -#include "ilo_cp.h" +#include "ilo_gpe.h" #include "ilo_resource.h" #include "ilo_shader.h" -#include "ilo_gpe_gen6.h" +#include "ilo_builder.h" static inline void -gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, - uint32_t clear_val) +gen6_3DSTATE_URB(struct ilo_builder *builder, + int vs_total_size, int gs_total_size, + int vs_entry_size, int gs_entry_size) { const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | - (cmd_len - 2); + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); + const int row_size = 128; /* 1024 bits */ + int vs_alloc_size, gs_alloc_size; + int vs_num_entries, gs_num_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = clear_val; - dw[2] = 1; -} + /* in 1024-bit URB rows */ + vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; + gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; -static inline void -gen7_3DSTATE_VF(struct ilo_builder *builder, - bool enable_cut_index, - uint32_t cut_index) -{ - const uint8_t cmd_len = 2; - uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); - uint32_t *dw; + /* the valid range is [1, 5] */ + if (!vs_alloc_size) + vs_alloc_size = 1; + if (!gs_alloc_size) + gs_alloc_size = 1; + assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - ILO_DEV_ASSERT(builder->dev, 7.5, 7.5); + /* the valid range is [24, 256] in multiples of 4 */ + vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; + if (vs_num_entries > 256) + vs_num_entries = 256; + assert(vs_num_entries >= 24); - if (enable_cut_index) - dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; + /* the valid range is [0, 256] in multiples of 4 */ + gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; + if (gs_num_entries > 256) + gs_num_entries = 256; ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = cut_index; + dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | + vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; + dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | + (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; } static inline void -gen7_3dstate_pointer(struct ilo_builder *builder, - int subop, uint32_t pointer) +gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, + int subop, int offset, int size) { const uint8_t cmd_len = 2; const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | GEN6_RENDER_SUBTYPE_3D | subop | (cmd_len - 2); uint32_t *dw; + int end; ILO_DEV_ASSERT(builder->dev, 7, 7.5); + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 68: + * + * "(A table that says the maximum size of each constant buffer is + * 16KB") + * + * From the Ivy Bridge PRM, volume 2 part 1, page 115: + * + * "The sum of the Constant Buffer Offset and the Constant Buffer Size + * may not exceed the maximum value of the Constant Buffer Size." + * + * Thus, the valid range of buffer end is [0KB, 16KB]. + */ + end = (offset + size) / 1024; + if (end > 16) { + assert(!"invalid constant buffer end"); + end = 16; + } + + /* the valid range of buffer offset is [0KB, 15KB] */ + offset = (offset + 1023) / 1024; + if (offset > 15) { + assert(!"invalid constant buffer offset"); + offset = 15; + } + + if (offset > end) { + assert(!size); + offset = end; + } + + /* the valid range of buffer size is [0KB, 15KB] */ + size = end - offset; + if (size > 15) { + assert(!"invalid constant buffer size"); + size = 15; + } + ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = pointer; + dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT | + size; } static inline void -gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, - uint32_t color_calc_state) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, + int offset, int size) { - gen7_3dstate_pointer(builder, - GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state); + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); } static inline void -gen7_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs, - int num_samplers) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, + int offset, int size) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - if (!gs) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = GEN7_GS_DW5_STATISTICS; - dw[6] = 0; - return; - } - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(gs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = 0; + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); } static inline void -gen7_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - enum pipe_format zs_format) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, + int offset, int size) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - const int num_samples = 1; - uint32_t payload[6], *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); +} - ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, - rasterizer, num_samples, zs_format, - payload, Elements(payload)); +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); +} - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); } static inline void -gen7_3DSTATE_WM(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, - bool cc_may_kill, uint32_t hiz_op) +gen7_3dstate_urb(struct ilo_builder *builder, + int subop, int offset, int size, + int entry_size) { - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - const int num_samples = 1; - uint32_t dw1, dw2, *dw; + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | + GEN6_RENDER_SUBTYPE_3D | + subop | (cmd_len - 2); + const int row_size = 64; /* 512 bits */ + int alloc_size, num_entries, min_entries, max_entries; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see ilo_gpe_init_rasterizer_wm() */ - if (rasterizer) { - dw1 = rasterizer->wm.payload[0]; - dw2 = rasterizer->wm.payload[1]; + /* VS, HS, DS, and GS variants */ + assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); - assert(!hiz_op); - dw1 |= GEN7_WM_DW1_STATISTICS; - } - else { - dw1 = hiz_op; - dw2 = 0; - } + /* in multiples of 8KB */ + assert(offset % 8192 == 0); + offset /= 8192; + + /* in multiple of 512-bit rows */ + alloc_size = (entry_size + row_size - 1) / row_size; + if (!alloc_size) + alloc_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may + * cause performance to decrease due to banking in the URB. Element + * sizes of 16 to 20 should be programmed with six 512-bit URB rows." + */ + if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) + alloc_size = 6; - if (fs) { - const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + /* in multiples of 8 */ + num_entries = (size / row_size / alloc_size) & ~7; - dw1 |= fs_cso->payload[3]; - } + switch (subop) { + case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_entries = (builder->dev->gt >= 2) ? 1664 : 640; + min_entries = (builder->dev->gt >= 2) ? 64 : 32; + break; + case ILO_GEN(7): + default: + max_entries = (builder->dev->gt == 2) ? 704 : 512; + min_entries = 32; + break; + } - if (cc_may_kill) - dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL; + assert(num_entries >= min_entries); + if (num_entries > max_entries) + num_entries = max_entries; + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: + max_entries = (builder->dev->gt == 2) ? 64 : 32; + if (num_entries > max_entries) + num_entries = max_entries; + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: + if (num_entries) + assert(num_entries >= 138); + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_entries = (builder->dev->gt >= 2) ? 640 : 256; + break; + case ILO_GEN(7): + default: + max_entries = (builder->dev->gt == 2) ? 320 : 192; + break; + } - if (num_samples > 1) { - dw1 |= rasterizer->wm.dw_msaa_rast; - dw2 |= rasterizer->wm.dw_msaa_disp; + if (num_entries > max_entries) + num_entries = max_entries; + break; + default: + break; } ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; + dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT | + (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT | + num_entries; } static inline void -gen7_3dstate_constant(struct ilo_builder *builder, - int subop, - const uint32_t *bufs, const int *sizes, - int num_bufs) +gen7_3DSTATE_URB_VS(struct ilo_builder *builder, + int offset, int size, int entry_size) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop | (cmd_len - 2); - uint32_t payload[6], *dw; - int total_read_length, i; + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, + offset, size, entry_size); +} - ILO_DEV_ASSERT(builder->dev, 7, 7.5); +static inline void +gen7_3DSTATE_URB_HS(struct ilo_builder *builder, + int offset, int size, int entry_size) +{ + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, + offset, size, entry_size); +} - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS && - subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK); +static inline void +gen7_3DSTATE_URB_DS(struct ilo_builder *builder, + int offset, int size, int entry_size) +{ + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, + offset, size, entry_size); +} - assert(num_bufs <= 4); +static inline void +gen7_3DSTATE_URB_GS(struct ilo_builder *builder, + int offset, int size, int entry_size) +{ + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, + offset, size, entry_size); +} - payload[0] = 0; - payload[1] = 0; +static inline void +gen7_3DSTATE_VF(struct ilo_builder *builder, + bool enable_cut_index, + uint32_t cut_index) +{ + const uint8_t cmd_len = 2; + uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); + uint32_t *dw; - total_read_length = 0; - for (i = 0; i < 4; i++) { - int read_len; + ILO_DEV_ASSERT(builder->dev, 7.5, 7.5); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 112: - * - * "Constant buffers must be enabled in order from Constant Buffer 0 - * to Constant Buffer 3 within this command. For example, it is - * not allowed to enable Constant Buffer 1 by programming a - * non-zero value in the VS Constant Buffer 1 Read Length without a - * non-zero value in VS Constant Buffer 0 Read Length." - */ - if (i >= num_bufs || !sizes[i]) { - for (; i < 4; i++) { - assert(i >= num_bufs || !sizes[i]); - payload[2 + i] = 0; - } - break; - } + if (enable_cut_index) + dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; - /* read lengths are in 256-bit units */ - read_len = (sizes[i] + 31) / 32; - /* the lower 5 bits are used for memory object control state */ - assert(bufs[i] % 32 == 0); + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = cut_index; +} - payload[i / 2] |= read_len << ((i % 2) ? 16 : 0); - payload[2 + i] = bufs[i]; +static inline void +gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, + bool enable) +{ + const uint8_t cmd_len = 1; + const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) | + enable; - total_read_length += read_len; - } + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + ilo_builder_batch_write(builder, cmd_len, &dw0); +} + +static inline void +gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, + const struct ilo_ve_state *ve, + const struct ilo_vb_state *vb) +{ + uint8_t cmd_len; + uint32_t dw0, *dw; + unsigned hw_idx, pos; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); /* - * From the Ivy Bridge PRM, volume 2 part 1, page 113: + * From the Sandy Bridge PRM, volume 2 part 1, page 82: * - * "The sum of all four read length fields must be less than or equal - * to the size of 64" + * "From 1 to 33 VBs can be specified..." */ - assert(total_read_length <= 64); + assert(ve->vb_count <= 33); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + if (!ve->vb_count) + return; + + cmd_len = 1 + 4 * ve->vb_count; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | + (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); -} -static inline void -gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, - bufs, sizes, num_bufs); + dw++; + pos++; + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned instance_divisor = ve->instance_divisors[hw_idx]; + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; + + dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT; + + if (instance_divisor) + dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA; + else + dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA; + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) + dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED; + + /* use null vb if there is no buffer or the stride is out of range */ + if (cso->buffer && cso->stride <= 2048) { + const struct ilo_buffer *buf = ilo_buffer(cso->buffer); + const uint32_t start_offset = cso->buffer_offset; + const uint32_t end_offset = buf->bo_size - 1; + + dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT; + ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); + ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); + } + else { + dw[0] |= 1 << 13; + dw[1] = 0; + dw[2] = 0; + } + + dw[3] = instance_divisor; + + dw += 4; + pos += 4; + } } static inline void -gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) +ve_init_cso_with_components(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) { - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, - bufs, sizes, num_bufs); + ILO_DEV_ASSERT(dev, 6, 7.5); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = GEN6_VE_STATE_DW0_VALID; + cso->payload[1] = + comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | + comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | + comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | + comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; } static inline void -gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) +ve_set_cso_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) { - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, - bufs, sizes, num_bufs); + int format; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + + cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; + cso->payload[1] = + GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; + + /* + * Edge flags have format GEN6_FORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. + */ + format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; + if (format == GEN6_FORMAT_R32_FLOAT) { + STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); + cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); + } + else { + assert(format == GEN6_FORMAT_R8_UINT); + } } static inline void -gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask, - int num_samples) +gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, + const struct ilo_ve_state *ve, + bool last_velement_edgeflag, + bool prepend_generated_ids) { - const uint8_t cmd_len = 2; - const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | - (cmd_len - 2); - uint32_t *dw; + uint8_t cmd_len; + uint32_t dw0, *dw; + unsigned i; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); /* - * From the Ivy Bridge PRM, volume 2 part 1, page 294: - * - * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field - * (Sample Mask) must be zero. + * From the Sandy Bridge PRM, volume 2 part 1, page 93: * - * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field - * must be zero." + * "Up to 34 (DevSNB+) vertex elements are supported." */ - sample_mask &= valid_mask; + assert(ve->count + prepend_generated_ids <= 34); + + STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); + + if (!ve->count && !prepend_generated_ids) { + struct ilo_ve_cso dummy; + + ve_init_cso_with_components(builder->dev, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_1_FP, + &dummy); + + cmd_len = 3; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); + + return; + } + + cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | + (cmd_len - 2); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = sample_mask; + dw++; + + if (prepend_generated_ids) { + struct ilo_ve_cso gen_ids; + + ve_init_cso_with_components(builder->dev, + GEN6_VFCOMP_STORE_VID, + GEN6_VFCOMP_STORE_IID, + GEN6_VFCOMP_NOSTORE, + GEN6_VFCOMP_NOSTORE, + &gen_ids); + + memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); + dw += 2; + } + + if (last_velement_edgeflag) { + struct ilo_ve_cso edgeflag; + + for (i = 0; i < ve->count - 1; i++) + memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); + + edgeflag = ve->cso[i]; + ve_set_cso_edgeflag(builder->dev, &edgeflag); + memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); + } + else { + for (i = 0; i < ve->count; i++) + memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); + } } static inline void -gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) +gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, + const struct ilo_ib_state *ib, + bool enable_cut_index) { - gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS, - bufs, sizes, num_bufs); + const uint8_t cmd_len = 3; + struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); + uint32_t start_offset, end_offset; + int format; + unsigned pos; + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (!buf) + return; + + /* this is moved to the new 3DSTATE_VF */ + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) + assert(!enable_cut_index); + + switch (ib->hw_index_size) { + case 4: + format = GEN6_IB_DW0_FORMAT_DWORD; + break; + case 2: + format = GEN6_IB_DW0_FORMAT_WORD; + break; + case 1: + format = GEN6_IB_DW0_FORMAT_BYTE; + break; + default: + assert(!"unknown index size"); + format = GEN6_IB_DW0_FORMAT_BYTE; + break; + } + + /* + * set start_offset to 0 here and adjust pipe_draw_info::start with + * ib->draw_start_offset in 3DPRIMITIVE + */ + start_offset = 0; + end_offset = buf->bo_size; + + /* end_offset must also be aligned and is inclusive */ + end_offset -= (end_offset % ib->hw_index_size); + end_offset--; + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | + format | + (cmd_len - 2); + if (enable_cut_index) + dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); + ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); } static inline void -gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) +gen6_3DSTATE_VS(struct ilo_builder *builder, + const struct ilo_shader_state *vs, + int num_samplers) { - gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS, - bufs, sizes, num_bufs); + const uint8_t cmd_len = 6; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (!vs) { + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = 0; + dw[5] = 0; + + return; + } + + cso = ilo_shader_get_kernel_cso(vs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(vs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; } static inline void @@ -411,6 +715,139 @@ gen7_3DSTATE_DS(struct ilo_builder *builder, } static inline void +gen6_3DSTATE_GS(struct ilo_builder *builder, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); + uint32_t dw1, dw2, dw4, dw5, dw6, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + if (gs) { + const struct ilo_shader_cso *cso; + + dw1 = ilo_shader_get_kernel_offset(gs); + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + dw6 = cso->payload[3]; + } + else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { + struct ilo_shader_cso cso; + enum ilo_kernel_param param; + + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } + + dw1 = ilo_shader_get_kernel_offset(vs) + + ilo_shader_get_kernel_param(vs, param); + + /* cannot use VS's CSO */ + ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso); + dw2 = cso.payload[0]; + dw4 = cso.payload[1]; + dw5 = cso.payload[2]; + dw6 = cso.payload[3]; + } + else { + dw1 = 0; + dw2 = 0; + dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; + dw5 = GEN6_GS_DW5_STATISTICS; + dw6 = 0; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + dw[3] = 0; + dw[4] = dw4; + dw[5] = dw5; + dw[6] = dw6; +} + +static inline void +gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, + int index, unsigned svbi, + unsigned max_svbi, + bool load_vertex_count) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | + (cmd_len - 2); + uint32_t dw1, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(index >= 0 && index < 4); + + dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT; + if (load_vertex_count) + dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = svbi; + dw[3] = max_svbi; +} + +static inline void +gen7_3DSTATE_GS(struct ilo_builder *builder, + const struct ilo_shader_state *gs, + int num_samplers) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + if (!gs) { + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = 0; + dw[5] = GEN7_GS_DW5_STATISTICS; + dw[6] = 0; + return; + } + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(gs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = 0; +} + +static inline void gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, unsigned buffer_mask, int vertex_attrib_count, @@ -471,124 +908,204 @@ gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, } static inline void -gen7_3DSTATE_SBE(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs) +gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, + const struct pipe_stream_output_info *so_info) { - const uint8_t cmd_len = 14; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); - uint32_t payload[13], *dw; + uint16_t cmd_len; + uint32_t dw0, *dw; + int buffer_selects, num_entries, i; + uint16_t so_decls[128]; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, - rasterizer, fs, payload, Elements(payload)); + buffer_selects = 0; + num_entries = 0; + + if (so_info) { + int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + + memset(buffer_offsets, 0, sizeof(buffer_offsets)); + + for (i = 0; i < so_info->num_outputs; i++) { + unsigned decl, buf, reg, mask; + + buf = so_info->output[i].output_buffer; + + /* pad with holes */ + assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); + while (buffer_offsets[buf] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; + if (num_dwords > 4) + num_dwords = 4; + + decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + GEN7_SO_DECL_HOLE_FLAG | + ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + + so_decls[num_entries++] = decl; + buffer_offsets[buf] += num_dwords; + } + + reg = so_info->output[i].register_index; + mask = ((1 << so_info->output[i].num_components) - 1) << + so_info->output[i].start_component; + + decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + reg << GEN7_SO_DECL_REG_INDEX__SHIFT | + mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + + so_decls[num_entries++] = decl; + buffer_selects |= 1 << buf; + buffer_offsets[buf] += so_info->output[i].num_components; + } + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "Errata: All 128 decls for all four streams must be included + * whenever this command is issued. The "Num Entries [n]" fields still + * contain the actual numbers of valid decls." + * + * Also note that "DWord Length" has 9 bits for this command, and the type + * of cmd_len is thus uint16_t. + */ + cmd_len = 2 * 128 + 3; + dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); + dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | + 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | + 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | + buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; + dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | + 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | + 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | + num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; + dw += 3; + + for (i = 0; i < num_entries; i++) { + dw[0] = so_decls[i]; + dw[1] = 0; + dw += 2; + } + for (; i < 128; i++) { + dw[0] = 0; + dw[1] = 0; + dw += 2; + } } static inline void -gen7_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int num_samplers, bool dual_blend) +gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target) { - const uint8_t cmd_len = 8; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | + (cmd_len - 2); + struct ilo_buffer *buf; + int end; + unsigned pos; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - if (!fs) { - int max_threads; - - /* GPU hangs if none of the dispatch enable bits is set */ - dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_threads = (builder->dev->gt == 3) ? 408 : - (builder->dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (builder->dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - + if (!so_target || !so_target->buffer) { ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = 0; + dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; dw[2] = 0; dw[3] = 0; - dw[4] = dw4; - dw[5] = 0; - dw[6] = 0; - dw[7] = 0; return; } - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; + buf = ilo_buffer(so_target->buffer); - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + /* DWord-aligned */ + assert(stride % 4 == 0 && base % 4 == 0); + assert(so_target->buffer_offset % 4 == 0); - if (dual_blend) - dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + stride &= ~3; + base = (base + so_target->buffer_offset) & ~3; + end = (base + so_target->buffer_size) & ~3; - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = 0; /* kernel 1 */ - dw[7] = 0; /* kernel 2 */ -} + dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | + stride; -static inline void -gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder, - uint32_t sf_clip_viewport) -{ - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - sf_clip_viewport); + ilo_builder_batch_reloc(builder, pos + 2, + buf->bo, base, INTEL_RELOC_WRITE); + ilo_builder_batch_reloc(builder, pos + 3, + buf->bo, end, INTEL_RELOC_WRITE); } static inline void -gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder, - uint32_t cc_viewport) +gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder, + uint32_t vs_binding_table, + uint32_t gs_binding_table, + uint32_t ps_binding_table) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - cc_viewport); + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | + GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | + GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | + GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vs_binding_table; + dw[2] = gs_binding_table; + dw[3] = ps_binding_table; } static inline void -gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, - uint32_t blend_state) +gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder, + uint32_t vs_sampler_state, + uint32_t gs_sampler_state, + uint32_t ps_sampler_state) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, - blend_state); + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | + GEN6_PTR_SAMPLER_DW0_VS_CHANGED | + GEN6_PTR_SAMPLER_DW0_GS_CHANGED | + GEN6_PTR_SAMPLER_DW0_PS_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vs_sampler_state; + dw[2] = gs_sampler_state; + dw[3] = ps_sampler_state; } static inline void -gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder, - uint32_t depth_stencil_state) +gen7_3dstate_pointer(struct ilo_builder *builder, + int subop, uint32_t pointer) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, - depth_stencil_state); + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | + GEN6_RENDER_SUBTYPE_3D | + subop | (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = pointer; } static inline void @@ -628,15 +1145,6 @@ gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder, } static inline void -gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder, - uint32_t binding_table) -{ - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, - binding_table); -} - -static inline void gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder, uint32_t sampler_state) { @@ -672,459 +1180,445 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder, sampler_state); } -static inline void -gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, - uint32_t sampler_state) +static inline unsigned +gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, int max_read_length, + uint32_t *dw, int num_dwords) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, - sampler_state); -} + unsigned enabled = 0x0; + int total_read_length, i; -static inline void -gen7_3dstate_urb(struct ilo_builder *builder, - int subop, int offset, int size, - int entry_size) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop | (cmd_len - 2); - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; - uint32_t *dw; + assert(num_dwords == 4); - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + total_read_length = 0; + for (i = 0; i < 4; i++) { + if (i < num_bufs && sizes[i]) { + /* in 256-bit units minus one */ + const int read_len = (sizes[i] + 31) / 32 - 1; - /* VS, HS, DS, and GS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); + assert(bufs[i] % 32 == 0); + assert(read_len < 32); - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; + enabled |= 1 << i; + dw[i] = bufs[i] | read_len; - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; + total_read_length += read_len + 1; + } + else { + dw[i] = 0; + } + } - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) - alloc_size = 6; + assert(total_read_length <= max_read_length); - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; + return enabled; +} - switch (subop) { - case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 1664 : 640; - min_entries = (builder->dev->gt >= 2) ? 64 : 32; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 704 : 512; - min_entries = 32; - break; - } +static inline void +gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: - max_entries = (builder->dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: - if (num_entries) - assert(num_entries >= 138); - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 640 : 256; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 320 : 192; - break; - } + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 32" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | + buf_enabled << 12 | + (cmd_len - 2); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT | - (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT | - num_entries; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); } static inline void -gen7_3DSTATE_URB_VS(struct ilo_builder *builder, - int offset, int size, int entry_size) +gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, - offset, size, entry_size); -} + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; -static inline void -gen7_3DSTATE_URB_HS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, - offset, size, entry_size); -} + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); -static inline void -gen7_3DSTATE_URB_DS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, - offset, size, entry_size); -} + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 161: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); -static inline void -gen7_3DSTATE_URB_GS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, - offset, size, entry_size); + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | + buf_enabled << 12 | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); } static inline void -gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, - int subop, int offset, int size) +gen7_3dstate_constant(struct ilo_builder *builder, + int subop, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - const uint8_t cmd_len = 2; + const uint8_t cmd_len = 7; const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | GEN6_RENDER_SUBTYPE_3D | subop | (cmd_len - 2); - uint32_t *dw; - int end; + uint32_t payload[6], *dw; + int total_read_length, i; ILO_DEV_ASSERT(builder->dev, 7, 7.5); /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); + assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS && + subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." - * - * Thus, the valid range of buffer end is [0KB, 16KB]. - */ - end = (offset + size) / 1024; - if (end > 16) { - assert(!"invalid constant buffer end"); - end = 16; - } + assert(num_bufs <= 4); - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15) { - assert(!"invalid constant buffer offset"); - offset = 15; - } + payload[0] = 0; + payload[1] = 0; - if (offset > end) { - assert(!size); - offset = end; - } + total_read_length = 0; + for (i = 0; i < 4; i++) { + int read_len; - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15) { - assert(!"invalid constant buffer size"); - size = 15; + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 112: + * + * "Constant buffers must be enabled in order from Constant Buffer 0 + * to Constant Buffer 3 within this command. For example, it is + * not allowed to enable Constant Buffer 1 by programming a + * non-zero value in the VS Constant Buffer 1 Read Length without a + * non-zero value in VS Constant Buffer 0 Read Length." + */ + if (i >= num_bufs || !sizes[i]) { + for (; i < 4; i++) { + assert(i >= num_bufs || !sizes[i]); + payload[2 + i] = 0; + } + break; + } + + /* read lengths are in 256-bit units */ + read_len = (sizes[i] + 31) / 32; + /* the lower 5 bits are used for memory object control state */ + assert(bufs[i] % 32 == 0); + + payload[i / 2] |= read_len << ((i % 2) ? 16 : 0); + payload[2 + i] = bufs[i]; + + total_read_length += read_len; } + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 113: + * + * "The sum of all four read length fields must be less than or equal + * to the size of 64" + */ + assert(total_read_length <= 64); + ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT | - size; -} - -static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, - int offset, int size) -{ - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); + memcpy(&dw[1], payload, sizeof(payload)); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); + gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); + gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, + bufs, sizes, num_bufs); } -static inline void -gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, - const struct pipe_stream_output_info *so_info) +static inline uint32_t +gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, + uint32_t *surface_states, + int num_surface_states) { - uint16_t cmd_len; - uint32_t dw0, *dw; - int buffer_selects, num_entries, i; - uint16_t so_decls[128]; + const int state_align = 32; + const int state_len = num_surface_states; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - buffer_selects = 0; - num_entries = 0; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 69: + * + * "It is stored as an array of up to 256 elements..." + */ + assert(num_surface_states <= 256); - if (so_info) { - int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + if (!num_surface_states) + return 0; - memset(buffer_offsets, 0, sizeof(buffer_offsets)); + return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE, + state_align, state_len, surface_states); +} - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, buf, reg, mask; +static inline uint32_t +gen6_SURFACE_STATE(struct ilo_builder *builder, + const struct ilo_view_surface *surf, + bool for_render) +{ + const int state_align = 32; + const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6; + uint32_t state_offset; - buf = so_info->output[i].output_buffer; + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - /* pad with holes */ - assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); - while (buffer_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; + state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE, + state_align, state_len, surf->payload); - num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; + if (surf->bo) { + ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, + surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); + } - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - GEN7_SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + return state_offset; +} - so_decls[num_entries++] = decl; - buffer_offsets[buf] += num_dwords; - } +static inline uint32_t +gen6_so_SURFACE_STATE(struct ilo_builder *builder, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index) +{ + struct ilo_buffer *buf = ilo_buffer(so->buffer); + unsigned bo_offset, struct_size; + enum pipe_format elem_format; + struct ilo_view_surface surf; - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; + ILO_DEV_ASSERT(builder->dev, 6, 6); - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - reg << GEN7_SO_DECL_REG_INDEX__SHIFT | - mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - so_decls[num_entries++] = decl; - buffer_selects |= 1 << buf; - buffer_offsets[buf] += so_info->output[i].num_components; - } + switch (so_info->output[so_index].num_components) { + case 1: + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + case 2: + elem_format = PIPE_FORMAT_R32G32_FLOAT; + break; + case 3: + elem_format = PIPE_FORMAT_R32G32B32_FLOAT; + break; + case 4: + elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"unexpected SO components length"); + elem_format = PIPE_FORMAT_R32_FLOAT; + break; } - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 201: - * - * "Errata: All 128 decls for all four streams must be included - * whenever this command is issued. The "Num Entries [n]" fields still - * contain the actual numbers of valid decls." - * - * Also note that "DWord Length" has 9 bits for this command, and the type - * of cmd_len is thus uint16_t. - */ - cmd_len = 2 * 128 + 3; - dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | - buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; - dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | - num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; - dw += 3; + ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset, + so->buffer_size, struct_size, elem_format, false, true, &surf); - for (i = 0; i < num_entries; i++) { - dw[0] = so_decls[i]; - dw[1] = 0; - dw += 2; - } - for (; i < 128; i++) { - dw[0] = 0; - dw[1] = 0; - dw += 2; - } + return gen6_SURFACE_STATE(builder, &surf, false); } -static inline void -gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, - int index, int base, int stride, - const struct pipe_stream_output_target *so_target) +static inline uint32_t +gen6_SAMPLER_STATE(struct ilo_builder *builder, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *views, + const uint32_t *sampler_border_colors, + int num_samplers) { - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | - (cmd_len - 2); - struct ilo_buffer *buf; - int end; - unsigned pos; - uint32_t *dw; + const int state_align = 32; + const int state_len = 4 * num_samplers; + uint32_t state_offset, *dw; + int i; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - if (!so_target || !so_target->buffer) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; - dw[2] = 0; - dw[3] = 0; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 101: + * + * "The sampler state is stored as an array of up to 16 elements..." + */ + assert(num_samplers <= 16); - return; - } + if (!num_samplers) + return 0; - buf = ilo_buffer(so_target->buffer); + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); + + for (i = 0; i < num_samplers; i++) { + const struct ilo_sampler_cso *sampler = samplers[i]; + const struct pipe_sampler_view *view = views[i]; + const uint32_t border_color = sampler_border_colors[i]; + uint32_t dw_filter, dw_wrap; + + /* there may be holes */ + if (!sampler || !view) { + /* disabled sampler */ + dw[0] = 1 << 31; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw += 4; + + continue; + } - /* DWord-aligned */ - assert(stride % 4 == 0 && base % 4 == 0); - assert(so_target->buffer_offset % 4 == 0); + /* determine filter and wrap modes */ + switch (view->texture->target) { + case PIPE_TEXTURE_1D: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_1d; + break; + case PIPE_TEXTURE_3D: + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." + */ + dw_filter = sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + case PIPE_TEXTURE_CUBE: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_cube; + break; + default: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + } - stride &= ~3; - base = (base + so_target->buffer_offset) & ~3; - end = (base + so_target->buffer_size) & ~3; + dw[0] = sampler->payload[0]; + dw[1] = sampler->payload[1]; + assert(!(border_color & 0x1f)); + dw[2] = border_color; + dw[3] = sampler->payload[2]; - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | - stride; + dw[0] |= dw_filter; - ilo_builder_batch_reloc(builder, pos + 2, - buf->bo, base, INTEL_RELOC_WRITE); - ilo_builder_batch_reloc(builder, pos + 3, - buf->bo, end, INTEL_RELOC_WRITE); + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + dw[3] |= dw_wrap; + } + else { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 21: + * + * "[DevSNB] Errata: Incorrect behavior is observed in cases + * where the min and mag mode filters are different and + * SurfMinLOD is nonzero. The determination of MagMode uses the + * following equation instead of the one in the above + * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" + * + * As a way to work around that, we set Base to + * view->u.tex.first_level. + */ + dw[0] |= view->u.tex.first_level << 22; + + dw[1] |= dw_wrap; + } + + dw += 4; + } + + return state_offset; } -static inline void -gen7_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist) +static inline uint32_t +gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, + const struct ilo_sampler_cso *sampler) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN7_3DPRIM_DW1_ACCESS_RANDOM : - GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - uint32_t *dw; + const int state_align = 32; + const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vb_access | prim; - dw[2] = info->count; - dw[3] = vb_start; - dw[4] = info->instance_count; - dw[5] = info->start_instance; - dw[6] = info->index_bias; + assert(Elements(sampler->payload) >= 3 + state_len); + + /* see ilo_gpe_init_sampler_cso() */ + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB, + state_align, state_len, &sampler->payload[3]); } static inline uint32_t -gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) +gen6_push_constant_buffer(struct ilo_builder *builder, + int size, void **pcb) { - const int state_align = 64; - const int state_len = 16 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 270: - * - * "The viewport-specific state used by both the SF and CL units - * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each - * of which contains the DWords described below. The start of each - * element is spaced 16 DWords apart. The location of first element of - * the array, as specified by both Pointer to SF_VIEWPORT and Pointer - * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + * For all VS, GS, FS, and CS push constant buffers, they must be aligned + * to 32 bytes, and their sizes are specified in 256-bit units. */ - assert(num_viewports && num_viewports <= 16); + const int state_align = 32; + const int state_len = align(size, 32) / 4; + uint32_t state_offset; + char *buf; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf); - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; + /* zero out the unused range */ + if (size < state_len * 4) + memset(&buf[size], 0, state_len * 4 - size); - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - dw[8] = fui(vp->min_gbx); - dw[9] = fui(vp->max_gbx); - dw[10] = fui(vp->min_gby); - dw[11] = fui(vp->max_gby); - dw[12] = 0; - dw[13] = 0; - dw[14] = 0; - dw[15] = 0; - - dw += 16; - } + if (pcb) + *pcb = buf; return state_offset; } -#endif /* ILO_GPE_GEN7_H */ +#endif /* ILO_BUILDER_3D_TOP_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index eb537f801b8..f417710237a 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -32,10 +32,6 @@ #include "intel_winsys.h" #include "ilo_common.h" -#include "ilo_cp.h" -#include "ilo_format.h" -#include "ilo_resource.h" -#include "ilo_shader.h" #include "ilo_gpe.h" /** @@ -58,34 +54,6 @@ ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) } /** - * Translate a pipe primitive type to the matching hardware primitive type. - */ -static inline int -ilo_gpe_gen6_translate_pipe_prim(unsigned prim) -{ - static const int prim_mapping[PIPE_PRIM_MAX] = { - [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, - [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, - [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, - [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, - [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, - [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, - [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, - [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, - }; - - assert(prim_mapping[prim]); - - return prim_mapping[prim]; -} - -/** * Translate a pipe texture target to the matching hardware surface type. */ static inline int @@ -112,1014 +80,6 @@ ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) } } -/** - * Fill in DW2 to DW7 of 3DSTATE_SF. - */ -static inline void -ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - int num_samples, - enum pipe_format depth_format, - uint32_t *payload, unsigned payload_len) -{ - assert(payload_len == Elements(rasterizer->sf.payload)); - - if (rasterizer) { - const struct ilo_rasterizer_sf *sf = &rasterizer->sf; - - memcpy(payload, sf->payload, sizeof(sf->payload)); - if (num_samples > 1) - payload[1] |= sf->dw_msaa; - } - else { - payload[0] = 0; - payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; - payload[2] = 0; - payload[3] = 0; - payload[4] = 0; - payload[5] = 0; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - int format; - - /* separate stencil */ - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: - format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; - break; - default: - /* FLOAT surface is assumed when there is no depth buffer */ - format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - - payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; - } -} - -/** - * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. - */ -static inline void -ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - uint32_t *dw, int num_dwords) -{ - int output_count, vue_offset, vue_len; - const struct ilo_kernel_routing *routing; - - ILO_DEV_ASSERT(dev, 6, 7.5); - assert(num_dwords == 13); - - if (!fs) { - memset(dw, 0, sizeof(dw[0]) * num_dwords); - dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; - return; - } - - output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - assert(output_count <= 32); - - routing = ilo_shader_get_kernel_routing(fs); - - vue_offset = routing->source_skip; - assert(vue_offset % 2 == 0); - vue_offset /= 2; - - vue_len = (routing->source_len + 1) / 2; - if (!vue_len) - vue_len = 1; - - dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | - vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | - vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; - if (routing->swizzle_enable) - dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; - - switch (rasterizer->state.sprite_coord_mode) { - case PIPE_SPRITE_COORD_UPPER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; - break; - case PIPE_SPRITE_COORD_LOWER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; - break; - } - - STATIC_ASSERT(Elements(routing->swizzles) >= 16); - memcpy(&dw[1], routing->swizzles, 2 * 16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - dw[9] = routing->point_sprite_enable; - - dw[10] = routing->const_interp_enable; - - /* WrapShortest enables */ - dw[11] = 0; - dw[12] = 0; -} - -static inline void -gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, - bool enable) -{ - const uint8_t cmd_len = 1; - const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) | - enable; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_write(builder, cmd_len, &dw0); -} - -static inline void -gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder, - uint32_t vs_binding_table, - uint32_t gs_binding_table, - uint32_t ps_binding_table) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | - GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vs_binding_table; - dw[2] = gs_binding_table; - dw[3] = ps_binding_table; -} - -static inline void -gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder, - uint32_t vs_sampler_state, - uint32_t gs_sampler_state, - uint32_t ps_sampler_state) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | - GEN6_PTR_SAMPLER_DW0_VS_CHANGED | - GEN6_PTR_SAMPLER_DW0_GS_CHANGED | - GEN6_PTR_SAMPLER_DW0_PS_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vs_sampler_state; - dw[2] = gs_sampler_state; - dw[3] = ps_sampler_state; -} - -static inline void -gen6_3DSTATE_URB(struct ilo_builder *builder, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | - vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; - dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | - (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; -} - -static inline void -gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - const struct ilo_vb_state *vb) -{ - uint8_t cmd_len; - uint32_t dw0, *dw; - unsigned hw_idx, pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 82: - * - * "From 1 to 33 VBs can be specified..." - */ - assert(ve->vb_count <= 33); - - if (!ve->vb_count) - return; - - cmd_len = 1 + 4 * ve->vb_count; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | - (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - - dw++; - pos++; - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - const unsigned instance_divisor = ve->instance_divisors[hw_idx]; - const unsigned pipe_idx = ve->vb_mapping[hw_idx]; - const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - - dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT; - - if (instance_divisor) - dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA; - else - dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED; - - /* use null vb if there is no buffer or the stride is out of range */ - if (cso->buffer && cso->stride <= 2048) { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; - const uint32_t end_offset = buf->bo_size - 1; - - dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); - } - else { - dw[0] |= 1 << 13; - dw[1] = 0; - dw[2] = 0; - } - - dw[3] = instance_divisor; - - dw += 4; - pos += 4; - } -} - -static inline void -ve_init_cso_with_components(const struct ilo_dev_info *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_DEV_ASSERT(dev, 6, 7.5); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = GEN6_VE_STATE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; -} - -static inline void -ve_set_cso_edgeflag(const struct ilo_dev_info *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - - cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; - - /* - * Edge flags have format GEN6_FORMAT_R8_UINT when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. - */ - format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; - if (format == GEN6_FORMAT_R32_FLOAT) { - STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); - cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); - } - else { - assert(format == GEN6_FORMAT_R8_UINT); - } -} - -static inline void -gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - bool last_velement_edgeflag, - bool prepend_generated_ids) -{ - uint8_t cmd_len; - uint32_t dw0, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 93: - * - * "Up to 34 (DevSNB+) vertex elements are supported." - */ - assert(ve->count + prepend_generated_ids <= 34); - - STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); - - if (!ve->count && !prepend_generated_ids) { - struct ilo_ve_cso dummy; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_1_FP, - &dummy); - - cmd_len = 3; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); - - return; - } - - cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw++; - - if (prepend_generated_ids) { - struct ilo_ve_cso gen_ids; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_VID, - GEN6_VFCOMP_STORE_IID, - GEN6_VFCOMP_NOSTORE, - GEN6_VFCOMP_NOSTORE, - &gen_ids); - - memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); - dw += 2; - } - - if (last_velement_edgeflag) { - struct ilo_ve_cso edgeflag; - - for (i = 0; i < ve->count - 1; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); - - edgeflag = ve->cso[i]; - ve_set_cso_edgeflag(builder->dev, &edgeflag); - memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); - } - else { - for (i = 0; i < ve->count; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); - } -} - -static inline void -gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, - const struct ilo_ib_state *ib, - bool enable_cut_index) -{ - const uint8_t cmd_len = 3; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - uint32_t start_offset, end_offset; - int format; - unsigned pos; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (!buf) - return; - - /* this is moved to the new 3DSTATE_VF */ - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) - assert(!enable_cut_index); - - switch (ib->hw_index_size) { - case 4: - format = GEN6_IB_DW0_FORMAT_DWORD; - break; - case 2: - format = GEN6_IB_DW0_FORMAT_WORD; - break; - case 1: - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - default: - assert(!"unknown index size"); - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - } - - /* - * set start_offset to 0 here and adjust pipe_draw_info::start with - * ib->draw_start_offset in 3DPRIMITIVE - */ - start_offset = 0; - end_offset = buf->bo_size; - - /* end_offset must also be aligned and is inclusive */ - end_offset -= (end_offset % ib->hw_index_size); - end_offset--; - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | - format | - (cmd_len - 2); - if (enable_cut_index) - dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); -} - -static inline void -gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder, - uint32_t clip_viewport, - uint32_t sf_viewport, - uint32_t cc_viewport) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | - GEN6_PTR_VP_DW0_CLIP_CHANGED | - GEN6_PTR_VP_DW0_SF_CHANGED | - GEN6_PTR_VP_DW0_CC_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = clip_viewport; - dw[2] = sf_viewport; - dw[3] = cc_viewport; -} - -static inline void -gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, - uint32_t blend_state, - uint32_t depth_stencil_state, - uint32_t color_calc_state) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = blend_state | 1; - dw[2] = depth_stencil_state | 1; - dw[3] = color_calc_state | 1; -} - -static inline void -gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder, - uint32_t scissor_rect) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = scissor_rect; -} - -static inline void -gen6_3DSTATE_VS(struct ilo_builder *builder, - const struct ilo_shader_state *vs, - int num_samplers) -{ - const uint8_t cmd_len = 6; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (!vs) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = 0; - - return; - } - - cso = ilo_shader_get_kernel_cso(vs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(vs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; -} - -static inline void -gen6_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs, - const struct ilo_shader_state *vs, - int verts_per_prim) -{ - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - uint32_t dw1, dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - if (gs) { - const struct ilo_shader_cso *cso; - - dw1 = ilo_shader_get_kernel_offset(gs); - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; - } - else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { - struct ilo_shader_cso cso; - enum ilo_kernel_param param; - - switch (verts_per_prim) { - case 1: - param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; - break; - case 2: - param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; - break; - default: - param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; - break; - } - - dw1 = ilo_shader_get_kernel_offset(vs) + - ilo_shader_get_kernel_param(vs, param); - - /* cannot use VS's CSO */ - ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso); - dw2 = cso.payload[0]; - dw4 = cso.payload[1]; - dw5 = cso.payload[2]; - dw6 = cso.payload[3]; - } - else { - dw1 = 0; - dw2 = 0; - dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; - dw5 = GEN6_GS_DW5_STATISTICS; - dw6 = 0; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - dw[3] = 0; - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; -} - -static inline void -gen6_3DSTATE_CLIP(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - bool enable_guardband, - int num_viewports) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); - uint32_t dw1, dw2, dw3, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (rasterizer) { - int interps; - - dw1 = rasterizer->clip.payload[0]; - dw2 = rasterizer->clip.payload[1]; - dw3 = rasterizer->clip.payload[2]; - - if (enable_guardband && rasterizer->clip.can_enable_guardband) - dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; - - interps = (fs) ? ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; - - if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | - GEN6_INTERP_NONPERSPECTIVE_CENTROID | - GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) - dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; - - dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | - (num_viewports - 1); - } - else { - dw1 = 0; - dw2 = 0; - dw3 = 0; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - dw[3] = dw3; -} - -static inline void -gen6_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs) -{ - const uint8_t cmd_len = 20; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - uint32_t payload_raster[6], payload_sbe[13], *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer, - 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); - ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer, - fs, payload_sbe, Elements(payload_sbe)); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = payload_sbe[0]; - memcpy(&dw[2], payload_raster, sizeof(payload_raster)); - memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4); -} - -static inline void -gen6_3DSTATE_WM(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int num_samplers, - const struct ilo_rasterizer_state *rasterizer, - bool dual_blend, bool cc_may_kill, - uint32_t hiz_op) -{ - const uint8_t cmd_len = 9; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - const int num_samples = 1; - const struct ilo_shader_cso *fs_cso; - uint32_t dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - if (!fs) { - /* see brwCreateContext() */ - const int max_threads = (builder->dev->gt == 2) ? 80 : 40; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = hiz_op; - /* honor the valid range even if dispatching is disabled */ - dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - dw[6] = 0; - dw[7] = 0; - dw[8] = 0; - - return; - } - - fs_cso = ilo_shader_get_kernel_cso(fs); - dw2 = fs_cso->payload[0]; - dw4 = fs_cso->payload[1]; - dw5 = fs_cso->payload[2]; - dw6 = fs_cso->payload[3]; - - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) must be disabled if either of these - * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve - * Enable or Depth Buffer Resolve Enable." - */ - assert(!hiz_op); - dw4 |= GEN6_WM_DW4_STATISTICS; - - if (cc_may_kill) - dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE; - - if (dual_blend) - dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; - - dw5 |= rasterizer->wm.payload[0]; - - dw6 |= rasterizer->wm.payload[1]; - - if (num_samples > 1) { - dw6 |= rasterizer->wm.dw_msaa_rast | - rasterizer->wm.dw_msaa_disp; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; - dw[7] = 0; /* kernel 1 */ - dw[8] = 0; /* kernel 2 */ -} - -static inline unsigned -gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, int max_read_length, - uint32_t *dw, int num_dwords) -{ - unsigned enabled = 0x0; - int total_read_length, i; - - assert(num_dwords == 4); - - total_read_length = 0; - for (i = 0; i < 4; i++) { - if (i < num_bufs && sizes[i]) { - /* in 256-bit units minus one */ - const int read_len = (sizes[i] + 31) / 32 - 1; - - assert(bufs[i] % 32 == 0); - assert(read_len < 32); - - enabled |= 1 << i; - dw[i] = bufs[i] | read_len; - - total_read_length += read_len + 1; - } - else { - dw[i] = 0; - } - } - - assert(total_read_length <= max_read_length); - - return enabled; -} - -static inline void -gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 138: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 32" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 161: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 287: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | - (cmd_len - 2); - const unsigned valid_mask = 0xf; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - sample_mask &= valid_mask; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = sample_mask; -} - -static inline void -gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, - unsigned x, unsigned y, - unsigned width, unsigned height) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | - (cmd_len - 2); - unsigned xmax = x + width - 1; - unsigned ymax = y + height - 1; - int rect_limit; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - rect_limit = 16383; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 230: - * - * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) - * must be an even number" - */ - assert(y % 2 == 0); - - rect_limit = 8191; - } - - if (x > rect_limit) x = rect_limit; - if (y > rect_limit) y = rect_limit; - if (xmax > rect_limit) xmax = rect_limit; - if (ymax > rect_limit) ymax = rect_limit; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = dw0; - dw[1] = y << 16 | x; - dw[2] = ymax << 16 | xmax; - - /* - * There is no need to set the origin. It is intended to support front - * buffer rendering. - */ - dw[3] = 0; -} - static inline void zs_align_surface(const struct ilo_dev_info *dev, unsigned align_w, unsigned align_h, @@ -1155,843 +115,4 @@ zs_align_surface(const struct ilo_dev_info *dev, zs->payload[2] = dw3; } -static inline void -gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 7; - unsigned pos; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = zs->payload[0]; - - if (zs->bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->bo, zs->payload[1], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } - - dw[3] = zs->payload[2]; - dw[4] = zs->payload[3]; - dw[5] = zs->payload[4]; - dw[6] = zs->payload[5]; -} - -static inline void -gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, - int x_offset, int y_offset) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - assert(x_offset >= 0 && x_offset <= 31); - assert(y_offset >= 0 && y_offset <= 31); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = x_offset << 8 | y_offset; -} - -static inline void -gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, - const struct pipe_poly_stipple *pattern) -{ - const uint8_t cmd_len = 33; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | - (cmd_len - 2); - uint32_t *dw; - int i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - STATIC_ASSERT(Elements(pattern->stipple) == 32); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw++; - - for (i = 0; i < 32; i++) - dw[i] = pattern->stipple[i]; -} - -static inline void -gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, - unsigned pattern, unsigned factor) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | - (cmd_len - 2); - uint32_t *dw; - unsigned inverse; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - assert((pattern & 0xffff) == pattern); - assert(factor >= 1 && factor <= 256); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = pattern; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - /* in U1.16 */ - inverse = (unsigned) (65536.0f / factor); - dw[2] = inverse << 15 | factor; - } - else { - /* in U1.13 */ - inverse = (unsigned) (8192.0f / factor); - dw[2] = inverse << 16 | factor; - } -} - -static inline void -gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0 << 16 | 0; - dw[2] = 0 << 16 | 0; -} - -static inline void -gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, - int index, unsigned svbi, - unsigned max_svbi, - bool load_vertex_count) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | - (cmd_len - 2); - uint32_t dw1, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(index >= 0 && index < 4); - - dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT; - if (load_vertex_count) - dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = svbi; - dw[3] = max_svbi; -} - -static inline void -gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, - int num_samples, - const uint32_t *packed_sample_pos, - bool pixel_location_center) -{ - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | - (cmd_len - 2); - uint32_t dw1, dw2, dw3, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw1 = (pixel_location_center) ? - GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; - - switch (num_samples) { - case 0: - case 1: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - case 4: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; - dw2 = packed_sample_pos[0]; - dw3 = 0; - break; - case 8: - assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); - dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; - dw2 = packed_sample_pos[0]; - dw3 = packed_sample_pos[1]; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[2] = dw3; -} - -static inline void -gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 3; - uint32_t dw0, *dw; - unsigned pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[6]; - - if (zs->separate_s8_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } -} - -static inline void -gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 3; - uint32_t dw0, *dw; - unsigned pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[8]; - - if (zs->hiz_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } -} - -static inline void -gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, - uint32_t clear_val) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | - GEN6_CLEAR_PARAMS_DW0_VALID | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = clear_val; -} - -static inline void -gen6_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist) -{ - const uint8_t cmd_len = 6; - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | - vb_access | - prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = info->count; - dw[2] = vb_start; - dw[3] = info->instance_count; - dw[4] = info->start_instance; - dw[5] = info->index_bias; -} - -static inline uint32_t -gen6_SF_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 8 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 262: - * - * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } - - return state_offset; -} - -static inline uint32_t -gen6_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 4 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 193: - * - * "The viewport-related state is stored as an array of up to 16 - * elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_gbx); - dw[1] = fui(vp->max_gbx); - dw[2] = fui(vp->min_gby); - dw[3] = fui(vp->max_gby); - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_CC_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 385: - * - * "The viewport state is stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_z); - dw[1] = fui(vp->max_z); - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_COLOR_CALC_STATE(struct ilo_builder *builder, - const struct pipe_stencil_ref *stencil_ref, - ubyte alpha_ref, - const struct pipe_blend_color *blend_color) -{ - const int state_align = 64; - const int state_len = 6; - uint32_t state_offset, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); - - dw[0] = stencil_ref->ref_value[0] << 24 | - stencil_ref->ref_value[1] << 16 | - GEN6_CC_DW0_ALPHATEST_UNORM8; - dw[1] = alpha_ref; - dw[2] = fui(blend_color->color[0]); - dw[3] = fui(blend_color->color[1]); - dw[4] = fui(blend_color->color[2]); - dw[5] = fui(blend_color->color[3]); - - return state_offset; -} - -static inline uint32_t -gen6_BLEND_STATE(struct ilo_builder *builder, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa) -{ - const int state_align = 64; - int state_len; - uint32_t state_offset, *dw; - unsigned num_targets, i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 376: - * - * "The blend state is stored as an array of up to 8 elements..." - */ - num_targets = fb->state.nr_cbufs; - assert(num_targets <= 8); - - if (!num_targets) { - if (!dsa->dw_alpha) - return 0; - /* to be able to reference alpha func */ - num_targets = 1; - } - - state_len = 2 * num_targets; - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); - - for (i = 0; i < num_targets; i++) { - const unsigned idx = (blend->independent_blend_enable) ? i : 0; - const struct ilo_blend_cso *cso = &blend->cso[idx]; - const int num_samples = fb->num_samples; - const struct util_format_description *format_desc = - (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ? - util_format_description(fb->state.cbufs[idx]->format) : NULL; - bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; - - rt_is_unorm = true; - rt_is_pure_integer = false; - rt_dst_alpha_forced_one = false; - - if (format_desc) { - int ch; - - switch (format_desc->format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - /* force alpha to one when the HW format has alpha */ - assert(ilo_translate_render_format(builder->dev, - PIPE_FORMAT_B8G8R8X8_UNORM) == - GEN6_FORMAT_B8G8R8A8_UNORM); - rt_dst_alpha_forced_one = true; - break; - default: - break; - } - - for (ch = 0; ch < 4; ch++) { - if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) - continue; - - if (format_desc->channel[ch].pure_integer) { - rt_is_unorm = false; - rt_is_pure_integer = true; - break; - } - - if (!format_desc->channel[ch].normalized || - format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) - rt_is_unorm = false; - } - } - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1]; - - if (!rt_is_pure_integer) { - if (rt_dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding - * _SRGB variants), otherwise Logic Ops must be DISABLED." - * - * Since logicop is ignored for non-UNORM color buffers, no special care - * is needed. - */ - if (rt_is_unorm) - dw[1] |= cso->dw_logicop; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 356: - * - * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage - * Dither both must be disabled." - * - * There is no such limitation on GEN7, or for AlphaToOne. But GL - * requires that anyway. - */ - if (num_samples > 1) - dw[1] |= cso->dw_alpha_mod; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - if (!rt_is_pure_integer) - dw[1] |= dsa->dw_alpha; - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, - const struct ilo_dsa_state *dsa) -{ - const int state_align = 64; - const int state_len = 3; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - STATIC_ASSERT(Elements(dsa->payload) >= state_len); - - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, - state_align, state_len, dsa->payload); -} - -static inline uint32_t -gen6_SCISSOR_RECT(struct ilo_builder *builder, - const struct ilo_scissor_state *scissor, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 2 * num_viewports; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 263: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - assert(Elements(scissor->payload) >= state_len); - - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, - state_align, state_len, scissor->payload); -} - -static inline uint32_t -gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, - uint32_t *surface_states, - int num_surface_states) -{ - const int state_align = 32; - const int state_len = num_surface_states; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 69: - * - * "It is stored as an array of up to 256 elements..." - */ - assert(num_surface_states <= 256); - - if (!num_surface_states) - return 0; - - return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE, - state_align, state_len, surface_states); -} - -static inline uint32_t -gen6_SURFACE_STATE(struct ilo_builder *builder, - const struct ilo_view_surface *surf, - bool for_render) -{ - const int state_align = 32; - const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6; - uint32_t state_offset; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE, - state_align, state_len, surf->payload); - - if (surf->bo) { - ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, - surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); - } - - return state_offset; -} - -static inline uint32_t -gen6_so_SURFACE_STATE(struct ilo_builder *builder, - const struct pipe_stream_output_target *so, - const struct pipe_stream_output_info *so_info, - int so_index) -{ - struct ilo_buffer *buf = ilo_buffer(so->buffer); - unsigned bo_offset, struct_size; - enum pipe_format elem_format; - struct ilo_view_surface surf; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - - switch (so_info->output[so_index].num_components) { - case 1: - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - case 2: - elem_format = PIPE_FORMAT_R32G32_FLOAT; - break; - case 3: - elem_format = PIPE_FORMAT_R32G32B32_FLOAT; - break; - case 4: - elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - break; - default: - assert(!"unexpected SO components length"); - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - } - - ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset, - so->buffer_size, struct_size, elem_format, false, true, &surf); - - return gen6_SURFACE_STATE(builder, &surf, false); -} - -static inline uint32_t -gen6_SAMPLER_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso * const *samplers, - const struct pipe_sampler_view * const *views, - const uint32_t *sampler_border_colors, - int num_samplers) -{ - const int state_align = 32; - const int state_len = 4 * num_samplers; - uint32_t state_offset, *dw; - int i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 101: - * - * "The sampler state is stored as an array of up to 16 elements..." - */ - assert(num_samplers <= 16); - - if (!num_samplers) - return 0; - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); - - for (i = 0; i < num_samplers; i++) { - const struct ilo_sampler_cso *sampler = samplers[i]; - const struct pipe_sampler_view *view = views[i]; - const uint32_t border_color = sampler_border_colors[i]; - uint32_t dw_filter, dw_wrap; - - /* there may be holes */ - if (!sampler || !view) { - /* disabled sampler */ - dw[0] = 1 << 31; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw += 4; - - continue; - } - - /* determine filter and wrap modes */ - switch (view->texture->target) { - case PIPE_TEXTURE_1D: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_1d; - break; - case PIPE_TEXTURE_3D: - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - dw_filter = sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - case PIPE_TEXTURE_CUBE: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_cube; - break; - default: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - } - - dw[0] = sampler->payload[0]; - dw[1] = sampler->payload[1]; - assert(!(border_color & 0x1f)); - dw[2] = border_color; - dw[3] = sampler->payload[2]; - - dw[0] |= dw_filter; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - dw[3] |= dw_wrap; - } - else { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases - * where the min and mag mode filters are different and - * SurfMinLOD is nonzero. The determination of MagMode uses the - * following equation instead of the one in the above - * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to - * view->u.tex.first_level. - */ - dw[0] |= view->u.tex.first_level << 22; - - dw[1] |= dw_wrap; - } - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso *sampler) -{ - const int state_align = 32; - const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - assert(Elements(sampler->payload) >= 3 + state_len); - - /* see ilo_gpe_init_sampler_cso() */ - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB, - state_align, state_len, &sampler->payload[3]); -} - -static inline uint32_t -gen6_push_constant_buffer(struct ilo_builder *builder, - int size, void **pcb) -{ - /* - * For all VS, GS, FS, and CS push constant buffers, they must be aligned - * to 32 bytes, and their sizes are specified in 256-bit units. - */ - const int state_align = 32; - const int state_len = align(size, 32) / 4; - uint32_t state_offset; - char *buf; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf); - - /* zero out the unused range */ - if (size < state_len * 4) - memset(&buf[size], 0, state_len * 4 - size); - - if (pcb) - *pcb = buf; - - return state_offset; -} - #endif /* ILO_GPE_GEN6_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 0e5602c65a6..21b4bfd6101 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -31,7 +31,8 @@ #include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" -#include "ilo_gpe_gen7.h" +#include "ilo_gpe_gen6.h" +#include "ilo_gpe.h" void ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, |