From 9e24c49e6443c076ad892e6004e04956560e446a Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 1 May 2015 11:47:13 +0800 Subject: ilo: move ilo_state_3d* to core ilo state structs (struct ilo_xxx_state) are moved as well. --- src/gallium/drivers/ilo/Makefile.sources | 6 +- src/gallium/drivers/ilo/core/ilo_state_3d.h | 424 ++++ src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c | 2223 +++++++++++++++++++ src/gallium/drivers/ilo/core/ilo_state_3d_top.c | 1711 +++++++++++++++ src/gallium/drivers/ilo/ilo_blitter_rectlist.c | 2 +- src/gallium/drivers/ilo/ilo_builder_3d_top.h | 2 +- src/gallium/drivers/ilo/ilo_shader.c | 2 +- src/gallium/drivers/ilo/ilo_state.c | 2 +- src/gallium/drivers/ilo/ilo_state.h | 257 +-- src/gallium/drivers/ilo/ilo_state_3d.h | 167 -- src/gallium/drivers/ilo/ilo_state_3d_bottom.c | 2225 -------------------- src/gallium/drivers/ilo/ilo_state_3d_top.c | 1713 --------------- 12 files changed, 4366 insertions(+), 4368 deletions(-) create mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d.h create mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c create mode 100644 src/gallium/drivers/ilo/core/ilo_state_3d_top.c delete mode 100644 src/gallium/drivers/ilo/ilo_state_3d.h delete mode 100644 src/gallium/drivers/ilo/ilo_state_3d_bottom.c delete mode 100644 src/gallium/drivers/ilo/ilo_state_3d_top.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index fbb33b4e10a..1743f7c7659 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -10,6 +10,9 @@ C_SOURCES := \ core/ilo_fence.h \ core/ilo_image.c \ core/ilo_image.h \ + core/ilo_state_3d.h \ + core/ilo_state_3d_bottom.c \ + core/ilo_state_3d_top.c \ core/intel_winsys.h \ ilo_blit.c \ ilo_blit.h \ @@ -57,9 +60,6 @@ C_SOURCES := \ ilo_shader.h \ ilo_state.c \ ilo_state.h \ - ilo_state_3d.h \ - ilo_state_3d_bottom.c \ - ilo_state_3d_top.c \ ilo_transfer.c \ ilo_transfer.h \ ilo_video.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h new file mode 100644 index 00000000000..e772f21605c --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -0,0 +1,424 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_STATE_3D_H +#define ILO_STATE_3D_H + +#include "genhw/genhw.h" +#include "pipe/p_state.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/** + * \see brw_context.h + */ +#define ILO_MAX_DRAW_BUFFERS 8 +#define ILO_MAX_CONST_BUFFERS (1 + 12) +#define ILO_MAX_SAMPLER_VIEWS 16 +#define ILO_MAX_SAMPLERS 16 +#define ILO_MAX_SO_BINDINGS 64 +#define ILO_MAX_SO_BUFFERS 4 +#define ILO_MAX_VIEWPORTS 1 + +#define ILO_MAX_SURFACES 256 + +struct intel_bo; +struct ilo_buffer; +struct ilo_shader_state; +struct ilo_texture; + +struct ilo_vb_state { + struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; + uint32_t enabled_mask; +}; + +struct ilo_ib_state { + struct pipe_resource *buffer; + const void *user_buffer; + unsigned offset; + unsigned index_size; + + /* these are not valid until the state is finalized */ + struct pipe_resource *hw_resource; + unsigned hw_index_size; + /* an offset to be added to pipe_draw_info::start */ + int64_t draw_start_offset; +}; + +struct ilo_ve_cso { + /* VERTEX_ELEMENT_STATE */ + uint32_t payload[2]; +}; + +struct ilo_ve_state { + struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; + unsigned count; + + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; + unsigned vb_mapping[PIPE_MAX_ATTRIBS]; + unsigned vb_count; + + /* these are not valid until the state is finalized */ + struct ilo_ve_cso edgeflag_cso; + bool last_cso_edgeflag; + + struct ilo_ve_cso nosrc_cso; + bool prepend_nosrc_cso; +}; + +struct ilo_so_state { + struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; + unsigned count; + unsigned append_bitmask; + + bool enabled; +}; + +struct ilo_viewport_cso { + /* matrix form */ + float m00, m11, m22, m30, m31, m32; + + /* guardband in NDC space */ + float min_gbx, min_gby, max_gbx, max_gby; + + /* viewport in screen space */ + float min_x, min_y, min_z; + float max_x, max_y, max_z; +}; + +struct ilo_viewport_state { + struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; + unsigned count; + + struct pipe_viewport_state viewport0; +}; + +struct ilo_scissor_state { + /* SCISSOR_RECT */ + uint32_t payload[ILO_MAX_VIEWPORTS * 2]; + + struct pipe_scissor_state scissor0; +}; + +struct ilo_rasterizer_clip { + /* 3DSTATE_CLIP */ + uint32_t payload[3]; + + uint32_t can_enable_guardband; +}; + +struct ilo_rasterizer_sf { + /* 3DSTATE_SF */ + uint32_t payload[3]; + uint32_t dw_msaa; + + /* Global Depth Offset Constant/Scale/Clamp */ + uint32_t dw_depth_offset_const; + uint32_t dw_depth_offset_scale; + uint32_t dw_depth_offset_clamp; + + /* Gen8+ 3DSTATE_RASTER */ + uint32_t dw_raster; +}; + +struct ilo_rasterizer_wm { + /* 3DSTATE_WM */ + uint32_t payload[2]; + uint32_t dw_msaa_rast; + uint32_t dw_msaa_disp; +}; + +struct ilo_rasterizer_state { + struct pipe_rasterizer_state state; + + struct ilo_rasterizer_clip clip; + struct ilo_rasterizer_sf sf; + struct ilo_rasterizer_wm wm; +}; + +struct ilo_dsa_state { + /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */ + uint32_t payload[3]; + + uint32_t dw_blend_alpha; + uint32_t dw_ps_blend_alpha; + ubyte alpha_ref; +}; + +struct ilo_blend_cso { + /* BLEND_STATE */ + uint32_t payload[2]; + + uint32_t dw_blend; + uint32_t dw_blend_dst_alpha_forced_one; +}; + +struct ilo_blend_state { + struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS]; + + bool dual_blend; + bool alpha_to_coverage; + + uint32_t dw_shared; + uint32_t dw_alpha_mod; + uint32_t dw_logicop; + + /* a part of 3DSTATE_PS_BLEND */ + uint32_t dw_ps_blend; + uint32_t dw_ps_blend_dst_alpha_forced_one; +}; + +struct ilo_sampler_cso { + /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ + uint32_t payload[15]; + + uint32_t dw_filter; + uint32_t dw_filter_aniso; + uint32_t dw_wrap; + uint32_t dw_wrap_1d; + uint32_t dw_wrap_cube; + + bool anisotropic; + bool saturate_r; + bool saturate_s; + bool saturate_t; +}; + +struct ilo_sampler_state { + const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; +}; + +struct ilo_view_surface { + /* SURFACE_STATE */ + uint32_t payload[13]; + struct intel_bo *bo; + + uint32_t scanout; +}; + +struct ilo_view_cso { + struct pipe_sampler_view base; + + struct ilo_view_surface surface; +}; + +struct ilo_view_state { + struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; + unsigned count; +}; + +struct ilo_cbuf_cso { + struct pipe_resource *resource; + struct ilo_view_surface surface; + + /* + * this CSO is not so constant because user buffer needs to be uploaded in + * finalize_constant_buffers() + */ + const void *user_buffer; + unsigned user_buffer_size; +}; + +struct ilo_cbuf_state { + struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; + uint32_t enabled_mask; +}; + +struct ilo_resource_state { + struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; + unsigned count; +}; + +struct ilo_surface_cso { + struct pipe_surface base; + + bool is_rt; + union { + struct ilo_view_surface rt; + struct ilo_zs_surface { + uint32_t payload[12]; + uint32_t dw_aligned_8x4; + + struct intel_bo *bo; + struct intel_bo *hiz_bo; + struct intel_bo *separate_s8_bo; + } zs; + } u; +}; + +struct ilo_fb_state { + struct pipe_framebuffer_state state; + + struct ilo_view_surface null_rt; + struct ilo_zs_surface null_zs; + + struct ilo_fb_blend_caps { + bool can_logicop; + bool can_blend; + bool can_alpha_test; + bool dst_alpha_forced_one; + } blend_caps[PIPE_MAX_COLOR_BUFS]; + + unsigned num_samples; +}; + +struct ilo_shader_cso { + uint32_t payload[5]; +}; + +/** + * Translate a pipe texture target to the matching hardware surface type. + */ +static inline int +ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_BUFFER: + return GEN6_SURFTYPE_BUFFER; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return GEN6_SURFTYPE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + return GEN6_SURFTYPE_2D; + case PIPE_TEXTURE_3D: + return GEN6_SURFTYPE_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return GEN6_SURFTYPE_CUBE; + default: + assert(!"unknown texture target"); + return GEN6_SURFTYPE_BUFFER; + } +} + +void +ilo_gpe_init_ve(const struct ilo_dev *dev, + unsigned num_states, + const struct pipe_vertex_element *states, + struct ilo_ve_state *ve); + +void +ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, + struct ilo_ve_cso *cso); + +void +ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso); + +void +ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, + const struct pipe_viewport_state *state, + struct ilo_viewport_cso *vp); + +void +ilo_gpe_set_scissor(const struct ilo_dev *dev, + unsigned start_slot, + unsigned num_states, + const struct pipe_scissor_state *states, + struct ilo_scissor_state *scissor); + +void +ilo_gpe_set_scissor_null(const struct ilo_dev *dev, + struct ilo_scissor_state *scissor); + +void +ilo_gpe_init_rasterizer(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_state *rasterizer); +void +ilo_gpe_init_dsa(const struct ilo_dev *dev, + const struct pipe_depth_stencil_alpha_state *state, + struct ilo_dsa_state *dsa); + +void +ilo_gpe_init_blend(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + struct ilo_blend_state *blend); + +void +ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, + const struct pipe_sampler_state *state, + struct ilo_sampler_cso *sampler); + +void +ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, + unsigned width, unsigned height, + unsigned depth, unsigned level, + struct ilo_view_surface *surf); + +void +ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, + const struct ilo_buffer *buf, + unsigned offset, unsigned size, + unsigned struct_size, + enum pipe_format elem_format, + bool is_rt, bool render_cache_rw, + struct ilo_view_surface *surf); + +void +ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, + unsigned first_level, + unsigned num_levels, + unsigned first_layer, + unsigned num_layers, + bool is_rt, + struct ilo_view_surface *surf); + +void +ilo_gpe_init_zs_surface(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, unsigned level, + unsigned first_layer, unsigned num_layers, + struct ilo_zs_surface *zs); + +void +ilo_gpe_init_vs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *vs, + struct ilo_shader_cso *cso); + +void +ilo_gpe_init_gs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso); + +void +ilo_gpe_init_fs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *fs, + struct ilo_shader_cso *cso); + +void +ilo_gpe_set_fb(const struct ilo_dev *dev, + const struct pipe_framebuffer_state *state, + struct ilo_fb_state *fb); + +#endif /* ILO_STATE_3D_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c new file mode 100644 index 00000000000..291c86b3406 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -0,0 +1,2223 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "genhw/genhw.h" +#include "util/u_dual_blend.h" +#include "util/u_framebuffer.h" +#include "util/u_half.h" + +#include "ilo_format.h" +#include "ilo_state_3d.h" +#include "../ilo_resource.h" +#include "../ilo_shader.h" + +static void +rasterizer_init_clip(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_clip *clip) +{ + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 6, 8); + + dw1 = GEN6_CLIP_DW1_STATISTICS; + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 219: + * + * "Workaround : Due to Hardware issue "EarlyCull" needs to be + * enabled only for the cases where the incoming primitive topology + * into the clipper guaranteed to be Trilist." + * + * What does this mean? + */ + dw1 |= 0 << 19 | + GEN7_CLIP_DW1_EARLY_CULL_ENABLE; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + if (state->front_ccw) + dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW; + + switch (state->cull_face) { + case PIPE_FACE_NONE: + dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE; + break; + case PIPE_FACE_FRONT: + dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT; + break; + case PIPE_FACE_BACK: + dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK; + break; + case PIPE_FACE_FRONT_AND_BACK: + dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH; + break; + } + } + } + + dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | + GEN6_CLIP_DW2_XY_TEST_ENABLE | + state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | + GEN6_CLIP_DW2_CLIPMODE_NORMAL; + + if (state->clip_halfz) + dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; + else + dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; + + if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip) + dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; + + if (state->flatshade_first) { + dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | + 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | + 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + } + else { + dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | + 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | + 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; + } + + dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | + 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT; + + clip->payload[0] = dw1; + clip->payload[1] = dw2; + clip->payload[2] = dw3; + + clip->can_enable_guardband = true; + + /* + * There are several reasons that guard band test should be disabled + * + * - GL wide points (to avoid partially visibie object) + * - GL wide or AA lines (to avoid partially visibie object) + */ + if (state->point_size_per_vertex || state->point_size > 1.0f) + clip->can_enable_guardband = false; + if (state->line_smooth || state->line_width > 1.0f) + clip->can_enable_guardband = false; +} + +static void +rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_sf *sf) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * Scale the constant term. The minimum representable value used by the HW + * is not large enouch to be the minimum resolvable difference. + */ + sf->dw_depth_offset_const = fui(state->offset_units * 2.0f); + sf->dw_depth_offset_scale = fui(state->offset_scale); + sf->dw_depth_offset_clamp = fui(state->offset_clamp); +} + +static void +rasterizer_init_sf_gen6(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_sf *sf) +{ + int line_width, point_width; + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) should be set whenever clipping is + * enabled and the Statistics Enable bit is set in CLIP_STATE. It + * should be cleared if clipping is disabled or Statistics Enable in + * CLIP_STATE is clear." + */ + dw1 = GEN7_SF_DW1_STATISTICS | + GEN7_SF_DW1_VIEWPORT_ENABLE; + + /* XXX GEN6 path seems to work fine for GEN7 */ + if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 258: + * + * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset + * Enable Solid , Global Depth Offset Enable Wireframe, and Global + * Depth Offset Enable Point) should be set whenever non zero depth + * bias (Slope, Bias) values are used. Setting this bit may have + * some degradation of performance for some workloads." + */ + if (state->offset_tri || state->offset_line || state->offset_point) { + /* XXX need to scale offset_const according to the depth format */ + dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET; + + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID | + GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | + GEN7_SF_DW1_DEPTH_OFFSET_POINT; + } + } else { + if (state->offset_tri) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; + if (state->offset_line) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; + if (state->offset_point) + dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; + } + + switch (state->fill_front) { + case PIPE_POLYGON_MODE_FILL: + dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID; + break; + case PIPE_POLYGON_MODE_LINE: + dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME; + break; + case PIPE_POLYGON_MODE_POINT: + dw1 |= GEN7_SF_DW1_FRONTFACE_POINT; + break; + } + + switch (state->fill_back) { + case PIPE_POLYGON_MODE_FILL: + dw1 |= GEN7_SF_DW1_BACKFACE_SOLID; + break; + case PIPE_POLYGON_MODE_LINE: + dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME; + break; + case PIPE_POLYGON_MODE_POINT: + dw1 |= GEN7_SF_DW1_BACKFACE_POINT; + break; + } + + if (state->front_ccw) + dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW; + + dw2 = 0; + + if (state->line_smooth) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 251: + * + * "This field (Anti-aliasing Enable) must be disabled if any of the + * render targets have integer (UINT or SINT) surface format." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 317: + * + * "This field (Hierarchical Depth Buffer Enable) must be disabled + * if Anti-aliasing Enable in 3DSTATE_SF is enabled. + * + * TODO We do not check those yet. + */ + dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE | + GEN7_SF_DW2_AA_LINE_CAP_1_0; + } + + switch (state->cull_face) { + case PIPE_FACE_NONE: + dw2 |= GEN7_SF_DW2_CULLMODE_NONE; + break; + case PIPE_FACE_FRONT: + dw2 |= GEN7_SF_DW2_CULLMODE_FRONT; + break; + case PIPE_FACE_BACK: + dw2 |= GEN7_SF_DW2_CULLMODE_BACK; + break; + case PIPE_FACE_FRONT_AND_BACK: + dw2 |= GEN7_SF_DW2_CULLMODE_BOTH; + break; + } + + /* + * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) + * pixels in the minor direction. We have to make the lines slightly + * thicker, 0.5 pixel on both sides, so that they intersect that many + * pixels are considered into the lines. + * + * Line width is in U3.7. + */ + line_width = (int) + ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); + line_width = CLAMP(line_width, 0, 1023); + + /* use GIQ rules */ + if (line_width == 128 && !state->line_smooth) + line_width = 0; + + dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; + + if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable) + dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; + + if (state->scissor) + dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; + + dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | + GEN7_SF_DW3_SUBPIXEL_8BITS; + + if (state->line_last_pixel) + dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; + + if (state->flatshade_first) { + dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } else { + dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } + + if (!state->point_size_per_vertex) + dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; + + /* in U8.3 */ + point_width = (int) (state->point_size * 8.0f + 0.5f); + point_width = CLAMP(point_width, 1, 2047); + + dw3 |= point_width; + + STATIC_ASSERT(Elements(sf->payload) >= 3); + sf->payload[0] = dw1; + sf->payload[1] = dw2; + sf->payload[2] = dw3; + + if (state->multisample) { + sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 251: + * + * "Software must not program a value of 0.0 when running in + * MSRASTMODE_ON_xxx modes - zero-width lines are not available + * when multisampling rasterization is enabled." + */ + if (!line_width) { + line_width = 128; /* 1.0f */ + + sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; + } + } else { + sf->dw_msaa = 0; + } + + rasterizer_init_sf_depth_offset_gen6(dev, state, sf); + /* 3DSTATE_RASTER is Gen8+ only */ + sf->dw_raster = 0; +} + +static uint32_t +rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state) +{ + uint32_t dw = 0; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (state->front_ccw) + dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW; + + switch (state->cull_face) { + case PIPE_FACE_NONE: + dw |= GEN8_RASTER_DW1_CULLMODE_NONE; + break; + case PIPE_FACE_FRONT: + dw |= GEN8_RASTER_DW1_CULLMODE_FRONT; + break; + case PIPE_FACE_BACK: + dw |= GEN8_RASTER_DW1_CULLMODE_BACK; + break; + case PIPE_FACE_FRONT_AND_BACK: + dw |= GEN8_RASTER_DW1_CULLMODE_BOTH; + break; + } + + if (state->point_smooth) + dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE; + + if (state->multisample) + dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE; + + if (state->offset_tri) + dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID; + if (state->offset_line) + dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME; + if (state->offset_point) + dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT; + + switch (state->fill_front) { + case PIPE_POLYGON_MODE_FILL: + dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID; + break; + case PIPE_POLYGON_MODE_LINE: + dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME; + break; + case PIPE_POLYGON_MODE_POINT: + dw |= GEN8_RASTER_DW1_FRONTFACE_POINT; + break; + } + + switch (state->fill_back) { + case PIPE_POLYGON_MODE_FILL: + dw |= GEN8_RASTER_DW1_BACKFACE_SOLID; + break; + case PIPE_POLYGON_MODE_LINE: + dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME; + break; + case PIPE_POLYGON_MODE_POINT: + dw |= GEN8_RASTER_DW1_BACKFACE_POINT; + break; + } + + if (state->line_smooth) + dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE; + + if (state->scissor) + dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE; + + if (state->depth_clip) + dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE; + + return dw; +} + +static void +rasterizer_init_sf_gen8(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_sf *sf) +{ + int line_width, point_width; + uint32_t dw1, dw2, dw3; + + ILO_DEV_ASSERT(dev, 8, 8); + + /* in U3.7 */ + line_width = (int) + ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); + line_width = CLAMP(line_width, 0, 1023); + + /* use GIQ rules */ + if (line_width == 128 && !state->line_smooth) + line_width = 0; + + /* in U8.3 */ + point_width = (int) (state->point_size * 8.0f + 0.5f); + point_width = CLAMP(point_width, 1, 2047); + + dw1 = GEN7_SF_DW1_STATISTICS | + GEN7_SF_DW1_VIEWPORT_ENABLE; + + dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; + if (state->line_smooth) + dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0; + + dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | + GEN7_SF_DW3_SUBPIXEL_8BITS | + point_width; + + if (state->line_last_pixel) + dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; + + if (state->flatshade_first) { + dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } else { + dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | + 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | + 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; + } + + if (!state->point_size_per_vertex) + dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; + + dw3 |= point_width; + + STATIC_ASSERT(Elements(sf->payload) >= 3); + sf->payload[0] = dw1; + sf->payload[1] = dw2; + sf->payload[2] = dw3; + + rasterizer_init_sf_depth_offset_gen6(dev, state, sf); + + sf->dw_msaa = 0; + sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state); +} + +static void +rasterizer_init_wm_gen6(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_wm *wm) +{ + uint32_t dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + /* only the FF unit states are set, as in GEN7 */ + + dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0; + + /* same value as in 3DSTATE_SF */ + if (state->line_smooth) + dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0; + + if (state->poly_stipple_enable) + dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; + if (state->line_stipple_enable) + dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; + + /* + * assertion that makes sure + * + * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp; + * + * is valid + */ + STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 && + GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); + dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL; + + if (state->bottom_edge_rule) + dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; + + wm->dw_msaa_rast = + (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0; + wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(Elements(wm->payload) >= 2); + wm->payload[0] = dw5; + wm->payload[1] = dw6; +} + +static void +rasterizer_init_wm_gen7(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_wm *wm) +{ + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + /* + * assertion that makes sure + * + * dw1 |= wm->dw_msaa_rast; + * dw2 |= wm->dw_msaa_disp; + * + * is valid + */ + STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 && + GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); + dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL | + GEN7_WM_DW1_AA_LINE_WIDTH_2_0; + dw2 = 0; + + /* same value as in 3DSTATE_SF */ + if (state->line_smooth) + dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0; + + if (state->poly_stipple_enable) + dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; + if (state->line_stipple_enable) + dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; + + if (state->bottom_edge_rule) + dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; + + wm->dw_msaa_rast = + (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0; + wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; + + STATIC_ASSERT(Elements(wm->payload) >= 2); + wm->payload[0] = dw1; + wm->payload[1] = dw2; +} + +static uint32_t +rasterizer_get_wm_gen8(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw = GEN7_WM_DW1_ZW_INTERP_PIXEL | + GEN7_WM_DW1_AA_LINE_WIDTH_2_0; + + /* same value as in 3DSTATE_SF */ + if (state->line_smooth) + dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0; + + if (state->poly_stipple_enable) + dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; + if (state->line_stipple_enable) + dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; + + if (state->bottom_edge_rule) + dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; + + return dw; +} + +void +ilo_gpe_init_rasterizer(const struct ilo_dev *dev, + const struct pipe_rasterizer_state *state, + struct ilo_rasterizer_state *rasterizer) +{ + rasterizer_init_clip(dev, state, &rasterizer->clip); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + memset(&rasterizer->wm, 0, sizeof(rasterizer->wm)); + rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state); + + rasterizer_init_sf_gen8(dev, state, &rasterizer->sf); + } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + rasterizer_init_wm_gen7(dev, state, &rasterizer->wm); + rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); + } else { + rasterizer_init_wm_gen6(dev, state, &rasterizer->wm); + rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); + } +} + +static void +fs_init_cso_gen6(const struct ilo_dev *dev, + const struct ilo_shader_state *fs, + struct ilo_shader_cso *cso) +{ + int start_grf, input_count, sampler_count, interps, max_threads; + uint32_t dw2, dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); + input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); + interps = ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); + + /* see brwCreateContext() */ + max_threads = (dev->gt == 2) ? 80 : 40; + + dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | + 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | + 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; + + dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the + * PS kernel or color calculator has the ability to kill (discard) + * pixels or samples, other than due to depth or stencil testing. + * This bit is required to be ENABLED in the following situations: + * + * The API pixel shader program contains "killpix" or "discard" + * instructions, or other code in the pixel shader kernel that can + * cause the final pixel mask to differ from the pixel mask received + * on dispatch. + * + * A sampler with chroma key enabled with kill pixel mode is used by + * the pixel shader. + * + * Any render target has Alpha Test Enable or AlphaToCoverage Enable + * enabled. + * + * The pixel shader kernel generates and outputs oMask. + * + * Note: As ClipDistance clipping is fully supported in hardware and + * therefore not via PS instructions, there should be no need to + * ENABLE this bit due to ClipDistance clipping." + */ + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) + dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 275: + * + * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth + * field must be set to disabled." + * + * TODO This is not checked yet. + */ + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) + dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) + dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) + dw5 |= GEN6_WM_DW5_PS_USE_W; + + /* + * TODO set this bit only when + * + * a) fs writes colors and color is not masked, or + * b) fs writes depth, or + * c) fs or cc kills + */ + if (true) + dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; + + assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); + dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; + + dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | + GEN6_WM_DW6_PS_POSOFFSET_NONE | + interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; + + STATIC_ASSERT(Elements(cso->payload) >= 4); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; + cso->payload[3] = dw6; +} + +static uint32_t +fs_get_wm_gen7(const struct ilo_dev *dev, + const struct ilo_shader_state *fs) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + dw = ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << + GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; + + /* + * TODO set this bit only when + * + * a) fs writes colors and color is not masked, or + * b) fs writes depth, or + * c) fs or cc kills + */ + dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 278: + * + * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that + * the PS kernel or color calculator has the ability to kill + * (discard) pixels or samples, other than due to depth or stencil + * testing. This bit is required to be ENABLED in the following + * situations: + * + * - The API pixel shader program contains "killpix" or "discard" + * instructions, or other code in the pixel shader kernel that + * can cause the final pixel mask to differ from the pixel mask + * received on dispatch. + * + * - A sampler with chroma key enabled with kill pixel mode is used + * by the pixel shader. + * + * - Any render target has Alpha Test Enable or AlphaToCoverage + * Enable enabled. + * + * - The pixel shader kernel generates and outputs oMask. + * + * Note: As ClipDistance clipping is fully supported in hardware + * and therefore not via PS instructions, there should be no need + * to ENABLE this bit due to ClipDistance clipping." + */ + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) + dw |= GEN7_WM_DW1_PS_KILL_PIXEL; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) + dw |= GEN7_WM_DW1_PSCDEPTH_ON; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) + dw |= GEN7_WM_DW1_PS_USE_DEPTH; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) + dw |= GEN7_WM_DW1_PS_USE_W; + + return dw; +} + +static void +fs_init_cso_gen7(const struct ilo_dev *dev, + const struct ilo_shader_state *fs, + struct ilo_shader_cso *cso) +{ + int start_grf, sampler_count, max_threads; + uint32_t dw2, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); + sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); + + dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + dw4 = GEN7_PS_DW4_POSOFFSET_NONE; + + /* see brwCreateContext() */ + switch (ilo_dev_gen(dev)) { + case ILO_GEN(7.5): + max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; + dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; + dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; + break; + case ILO_GEN(7): + default: + max_threads = (dev->gt == 2) ? 172 : 48; + dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; + break; + } + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) + dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) + dw4 |= GEN7_PS_DW4_ATTR_ENABLE; + + assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); + dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; + + dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | + 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | + 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(Elements(cso->payload) >= 4); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; + cso->payload[3] = fs_get_wm_gen7(dev, fs); +} + +static uint32_t +fs_get_psx_gen8(const struct ilo_dev *dev, + const struct ilo_shader_state *fs) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + dw = GEN8_PSX_DW1_DISPATCH_ENABLE; + + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) + dw |= GEN8_PSX_DW1_KILL_PIXEL; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) + dw |= GEN8_PSX_DW1_PSCDEPTH_ON; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) + dw |= GEN8_PSX_DW1_USE_DEPTH; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) + dw |= GEN8_PSX_DW1_USE_W; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) + dw |= GEN8_PSX_DW1_ATTR_ENABLE; + + return dw; +} + +static uint32_t +fs_get_wm_gen8(const struct ilo_dev *dev, + const struct ilo_shader_state *fs) +{ + ILO_DEV_ASSERT(dev, 8, 8); + + return ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << + GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; +} + +static void +fs_init_cso_gen8(const struct ilo_dev *dev, + const struct ilo_shader_state *fs, + struct ilo_shader_cso *cso) +{ + int start_grf, sampler_count; + uint32_t dw3, dw6, dw7; + + ILO_DEV_ASSERT(dev, 8, 8); + + start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); + sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); + + dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + /* always 64? */ + dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | + GEN8_PS_DW6_POSOFFSET_NONE; + if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) + dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; + + assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); + dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; + + dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT | + 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | + 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; + + STATIC_ASSERT(Elements(cso->payload) >= 5); + cso->payload[0] = dw3; + cso->payload[1] = dw6; + cso->payload[2] = dw7; + cso->payload[3] = fs_get_psx_gen8(dev, fs); + cso->payload[4] = fs_get_wm_gen8(dev, fs); +} + +void +ilo_gpe_init_fs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *fs, + struct ilo_shader_cso *cso) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + fs_init_cso_gen8(dev, fs, cso); + else if (ilo_dev_gen(dev) >= ILO_GEN(7)) + fs_init_cso_gen7(dev, fs, cso); + else + fs_init_cso_gen6(dev, fs, cso); +} + +struct ilo_zs_surface_info { + int surface_type; + int format; + + struct { + struct intel_bo *bo; + unsigned stride; + unsigned qpitch; + enum gen_surface_tiling tiling; + uint32_t offset; + } zs, stencil, hiz; + + unsigned width, height, depth; + unsigned lod, first_layer, num_layers; +}; + +static void +zs_init_info_null(const struct ilo_dev *dev, + struct ilo_zs_surface_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + memset(info, 0, sizeof(*info)); + + info->surface_type = GEN6_SURFTYPE_NULL; + info->format = GEN6_ZFORMAT_D32_FLOAT; + info->width = 1; + info->height = 1; + info->depth = 1; + info->num_layers = 1; +} + +static void +zs_init_info(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, unsigned level, + unsigned first_layer, unsigned num_layers, + struct ilo_zs_surface_info *info) +{ + bool separate_stencil; + + ILO_DEV_ASSERT(dev, 6, 8); + + memset(info, 0, sizeof(*info)); + + info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); + + if (info->surface_type == GEN6_SURFTYPE_CUBE) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 325-326: + * + * "For Other Surfaces (Cube Surfaces): + * This field (Minimum Array Element) is ignored." + * + * "For Other Surfaces (Cube Surfaces): + * This field (Render Target View Extent) is ignored." + * + * As such, we cannot set first_layer and num_layers on cube surfaces. + * To work around that, treat it as a 2D surface. + */ + info->surface_type = GEN6_SURFTYPE_2D; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + separate_stencil = true; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 317: + * + * "This field (Separate Stencil Buffer Enable) must be set to the + * same value (enabled or disabled) as Hierarchical Depth Buffer + * Enable." + */ + separate_stencil = + ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers); + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 317: + * + * "If this field (Hierarchical Depth Buffer Enable) is enabled, the + * Surface Format of the depth buffer cannot be + * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil + * requires the separate stencil buffer." + * + * From the Ironlake PRM, volume 2 part 1, page 330: + * + * "If this field (Separate Stencil Buffer Enable) is disabled, the + * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." + * + * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT + * is indeed used, the depth values output by the fragment shaders will + * be different when read back. + * + * As for GEN7+, separate_stencil is always true. + */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + info->format = GEN6_ZFORMAT_D16_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT: + info->format = GEN6_ZFORMAT_D32_FLOAT; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + info->format = (separate_stencil) ? + GEN6_ZFORMAT_D24_UNORM_X8_UINT : + GEN6_ZFORMAT_D24_UNORM_S8_UINT; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + info->format = (separate_stencil) ? + GEN6_ZFORMAT_D32_FLOAT : + GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; + break; + case PIPE_FORMAT_S8_UINT: + if (separate_stencil) { + info->format = GEN6_ZFORMAT_D32_FLOAT; + break; + } + /* fall through */ + default: + assert(!"unsupported depth/stencil format"); + zs_init_info_null(dev, info); + return; + break; + } + + if (format != PIPE_FORMAT_S8_UINT) { + info->zs.bo = tex->image.bo; + info->zs.stride = tex->image.bo_stride; + + assert(tex->image.layer_height % 4 == 0); + info->zs.qpitch = tex->image.layer_height / 4; + + info->zs.tiling = tex->image.tiling; + info->zs.offset = 0; + } + + if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) { + const struct ilo_texture *s8_tex = + (tex->separate_s8) ? tex->separate_s8 : tex; + + info->stencil.bo = s8_tex->image.bo; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 329: + * + * "The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved." + * + * For GEN7, we still dobule the stride because we did not double the + * slice widths when initializing the layout. + */ + info->stencil.stride = s8_tex->image.bo_stride * 2; + + assert(s8_tex->image.layer_height % 4 == 0); + info->stencil.qpitch = s8_tex->image.layer_height / 4; + + info->stencil.tiling = s8_tex->image.tiling; + + if (ilo_dev_gen(dev) == ILO_GEN(6)) { + unsigned x, y; + + assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD); + + /* offset to the level */ + ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y); + ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y); + info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y); + } + } + + if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) { + info->hiz.bo = tex->image.aux_bo; + info->hiz.stride = tex->image.aux_stride; + + assert(tex->image.aux_layer_height % 4 == 0); + info->hiz.qpitch = tex->image.aux_layer_height / 4; + + info->hiz.tiling = GEN6_TILING_Y; + + /* offset to the level */ + if (ilo_dev_gen(dev) == ILO_GEN(6)) + info->hiz.offset = tex->image.aux_offsets[level]; + } + + info->width = tex->image.width0; + info->height = tex->image.height0; + info->depth = (tex->base.target == PIPE_TEXTURE_3D) ? + tex->base.depth0 : num_layers; + + info->lod = level; + info->first_layer = first_layer; + info->num_layers = num_layers; +} + +void +ilo_gpe_init_zs_surface(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, unsigned level, + unsigned first_layer, unsigned num_layers, + struct ilo_zs_surface *zs) +{ + const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; + const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; + struct ilo_zs_surface_info info; + uint32_t dw1, dw2, dw3, dw4, dw5, dw6; + int align_w = 8, align_h = 4; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (tex) { + zs_init_info(dev, tex, format, level, first_layer, num_layers, &info); + + switch (tex->base.nr_samples) { + case 2: + align_w /= 2; + break; + case 4: + align_w /= 2; + align_h /= 2; + break; + case 8: + align_w /= 4; + align_h /= 2; + break; + case 16: + align_w /= 4; + align_h /= 4; + break; + default: + break; + } + } else { + zs_init_info_null(dev, &info); + } + + switch (info.surface_type) { + case GEN6_SURFTYPE_NULL: + break; + case GEN6_SURFTYPE_1D: + assert(info.width <= max_2d_size && info.height == 1 && + info.depth <= max_array_size); + assert(info.first_layer < max_array_size - 1 && + info.num_layers <= max_array_size); + break; + case GEN6_SURFTYPE_2D: + assert(info.width <= max_2d_size && info.height <= max_2d_size && + info.depth <= max_array_size); + assert(info.first_layer < max_array_size - 1 && + info.num_layers <= max_array_size); + break; + case GEN6_SURFTYPE_3D: + assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); + assert(info.first_layer < 2048 && info.num_layers <= max_array_size); + break; + case GEN6_SURFTYPE_CUBE: + assert(info.width <= max_2d_size && info.height <= max_2d_size && + info.depth == 1); + assert(info.first_layer == 0 && info.num_layers == 1); + assert(info.width == info.height); + break; + default: + assert(!"unexpected depth surface type"); + break; + } + + dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT | + info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT; + + if (info.zs.bo) { + /* required for GEN6+ */ + assert(info.zs.tiling == GEN6_TILING_Y); + assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 && + info.zs.stride % 128 == 0); + assert(info.width <= info.zs.stride); + + dw1 |= (info.zs.stride - 1); + dw2 = info.zs.offset; + } else { + dw2 = 0; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + if (info.zs.bo) + dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; + + if (info.stencil.bo) + dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE; + + if (info.hiz.bo) + dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; + + dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | + (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | + info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; + + zs->dw_aligned_8x4 = + (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | + (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | + info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; + + dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT | + info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT; + + dw5 = 0; + + dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + dw6 |= info.zs.qpitch; + } else { + /* always Y-tiled */ + dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT; + + if (info.hiz.bo) { + dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | + GEN6_DEPTH_DW1_SEPARATE_STENCIL; + } + + dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | + (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | + info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | + GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; + + zs->dw_aligned_8x4 = + (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | + (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | + info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | + GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; + + dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT | + info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT | + (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT; + + dw5 = 0; + + dw6 = 0; + } + + STATIC_ASSERT(Elements(zs->payload) >= 12); + + zs->payload[0] = dw1; + zs->payload[1] = dw2; + zs->payload[2] = dw3; + zs->payload[3] = dw4; + zs->payload[4] = dw5; + zs->payload[5] = dw6; + + /* do not increment reference count */ + zs->bo = info.zs.bo; + + /* separate stencil */ + if (info.stencil.bo) { + assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && + info.stencil.stride % 128 == 0); + + dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT; + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; + + dw2 = info.stencil.offset; + dw4 = info.stencil.qpitch; + } else { + dw1 = 0; + dw2 = 0; + dw4 = 0; + } + + zs->payload[6] = dw1; + zs->payload[7] = dw2; + zs->payload[8] = dw4; + /* do not increment reference count */ + zs->separate_s8_bo = info.stencil.bo; + + /* hiz */ + if (info.hiz.bo) { + dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT; + dw2 = info.hiz.offset; + dw4 = info.hiz.qpitch; + } else { + dw1 = 0; + dw2 = 0; + dw4 = 0; + } + + zs->payload[9] = dw1; + zs->payload[10] = dw2; + zs->payload[11] = dw4; + /* do not increment reference count */ + zs->hiz_bo = info.hiz.bo; +} + +static void +viewport_get_guardband(const struct ilo_dev *dev, + int center_x, int center_y, + int *min_gbx, int *max_gbx, + int *min_gby, int *max_gby) +{ + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 234: + * + * "Per-Device Guardband Extents + * + * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] + * - Maximum Post-Clamp Delta (X or Y): 16K" + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * From the Ivy Bridge PRM, volume 2 part 1, page 248: + * + * "Per-Device Guardband Extents + * + * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] + * - Maximum Post-Clamp Delta (X or Y): N/A" + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * Combined, the bounding box of any object can not exceed 8K in both + * width and height. + * + * Below we set the guardband as a squre of length 8K, centered at where + * the viewport is. This makes sure all objects passing the GB test are + * valid to the renderer, and those failing the XY clipping have a + * better chance of passing the GB test. + */ + const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384; + const int half_len = 8192 / 2; + + /* make sure the guardband is within the valid range */ + if (center_x - half_len < -max_extent) + center_x = -max_extent + half_len; + else if (center_x + half_len > max_extent - 1) + center_x = max_extent - half_len; + + if (center_y - half_len < -max_extent) + center_y = -max_extent + half_len; + else if (center_y + half_len > max_extent - 1) + center_y = max_extent - half_len; + + *min_gbx = (float) (center_x - half_len); + *max_gbx = (float) (center_x + half_len); + *min_gby = (float) (center_y - half_len); + *max_gby = (float) (center_y + half_len); +} + +void +ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, + const struct pipe_viewport_state *state, + struct ilo_viewport_cso *vp) +{ + const float scale_x = fabs(state->scale[0]); + const float scale_y = fabs(state->scale[1]); + const float scale_z = fabs(state->scale[2]); + int min_gbx, max_gbx, min_gby, max_gby; + + ILO_DEV_ASSERT(dev, 6, 8); + + viewport_get_guardband(dev, + (int) state->translate[0], + (int) state->translate[1], + &min_gbx, &max_gbx, &min_gby, &max_gby); + + /* matrix form */ + vp->m00 = state->scale[0]; + vp->m11 = state->scale[1]; + vp->m22 = state->scale[2]; + vp->m30 = state->translate[0]; + vp->m31 = state->translate[1]; + vp->m32 = state->translate[2]; + + /* guardband in NDC space */ + vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; + vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; + vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; + vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; + + /* viewport in screen space */ + vp->min_x = scale_x * -1.0f + state->translate[0]; + vp->max_x = scale_x * 1.0f + state->translate[0]; + vp->min_y = scale_y * -1.0f + state->translate[1]; + vp->max_y = scale_y * 1.0f + state->translate[1]; + vp->min_z = scale_z * -1.0f + state->translate[2]; + vp->max_z = scale_z * 1.0f + state->translate[2]; +} + +/** + * Translate a pipe logicop to the matching hardware logicop. + */ +static int +gen6_translate_pipe_logicop(unsigned logicop) +{ + switch (logicop) { + case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; + case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; + case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; + case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; + case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; + case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; + case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; + case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; + case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; + case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; + case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; + case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; + case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; + case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; + default: + assert(!"unknown logicop function"); + return GEN6_LOGICOP_CLEAR; + } +} + +/** + * Translate a pipe blend function to the matching hardware blend function. + */ +static int +gen6_translate_pipe_blend(unsigned blend) +{ + switch (blend) { + case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; + case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; + case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; + default: + assert(!"unknown blend function"); + return GEN6_BLENDFUNCTION_ADD; + }; +} + +/** + * Translate a pipe blend factor to the matching hardware blend factor. + */ +static int +gen6_translate_pipe_blendfactor(unsigned blendfactor) +{ + switch (blendfactor) { + case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; + default: + assert(!"unknown blend factor"); + return GEN6_BLENDFACTOR_ONE; + }; +} + +/** + * Translate a pipe stencil op to the matching hardware stencil op. + */ +static int +gen6_translate_pipe_stencil_op(unsigned stencil_op) +{ + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; + default: + assert(!"unknown stencil op"); + return GEN6_STENCILOP_KEEP; + } +} + +static int +gen6_blend_factor_dst_alpha_forced_one(int factor) +{ + switch (factor) { + case GEN6_BLENDFACTOR_DST_ALPHA: + return GEN6_BLENDFACTOR_ONE; + case GEN6_BLENDFACTOR_INV_DST_ALPHA: + case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: + return GEN6_BLENDFACTOR_ZERO; + default: + return factor; + } +} + +static uint32_t +blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev, + const struct pipe_rt_blend_state *rt, + bool dst_alpha_forced_one) +{ + int rgb_src, rgb_dst, a_src, a_dst; + uint32_t dw; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!rt->blend_enable) + return 0; + + rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); + rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); + a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); + a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); + + if (dst_alpha_forced_one) { + rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); + rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); + a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); + a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); + } + + dw = GEN6_RT_DW0_BLEND_ENABLE | + gen6_translate_pipe_blend(rt->alpha_func) << 26 | + a_src << 20 | + a_dst << 15 | + gen6_translate_pipe_blend(rt->rgb_func) << 11 | + rgb_src << 5 | + rgb_dst; + + if (rt->rgb_func != rt->alpha_func || + rgb_src != a_src || rgb_dst != a_dst) + dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE; + + return dw; +} + +static uint32_t +blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev, + const struct pipe_rt_blend_state *rt, + bool dst_alpha_forced_one, + bool *independent_alpha) +{ + int rgb_src, rgb_dst, a_src, a_dst; + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!rt->blend_enable) { + *independent_alpha = false; + return 0; + } + + rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); + rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); + a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); + a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); + + if (dst_alpha_forced_one) { + rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); + rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); + a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); + a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); + } + + dw = GEN8_RT_DW0_BLEND_ENABLE | + rgb_src << 26 | + rgb_dst << 21 | + gen6_translate_pipe_blend(rt->rgb_func) << 18 | + a_src << 13 | + a_dst << 8 | + gen6_translate_pipe_blend(rt->alpha_func) << 5; + + *independent_alpha = (rt->rgb_func != rt->alpha_func || + rgb_src != a_src || + rgb_dst != a_dst); + + return dw; +} + +static void +blend_init_cso_gen6(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + struct ilo_blend_state *blend, + unsigned index) +{ + const struct pipe_rt_blend_state *rt = &state->rt[index]; + struct ilo_blend_cso *cso = &blend->cso[index]; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + cso->payload[0] = 0; + cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | + GEN6_RT_DW1_PRE_BLEND_CLAMP | + GEN6_RT_DW1_POST_BLEND_CLAMP; + + if (!(rt->colormask & PIPE_MASK_A)) + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A; + if (!(rt->colormask & PIPE_MASK_R)) + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R; + if (!(rt->colormask & PIPE_MASK_G)) + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G; + if (!(rt->colormask & PIPE_MASK_B)) + cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Color Buffer Blending and Logic Ops must not be enabled + * simultaneously, or behavior is UNDEFINED." + * + * Since state->logicop_enable takes precedence over rt->blend_enable, + * no special care is needed. + */ + if (state->logicop_enable) { + cso->dw_blend = 0; + cso->dw_blend_dst_alpha_forced_one = 0; + } else { + cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false); + cso->dw_blend_dst_alpha_forced_one = + blend_get_rt_blend_enable_gen6(dev, rt, true); + } +} + +static bool +blend_init_cso_gen8(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + struct ilo_blend_state *blend, + unsigned index) +{ + const struct pipe_rt_blend_state *rt = &state->rt[index]; + struct ilo_blend_cso *cso = &blend->cso[index]; + bool independent_alpha = false; + + ILO_DEV_ASSERT(dev, 8, 8); + + cso->payload[0] = 0; + cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT | + GEN8_RT_DW1_PRE_BLEND_CLAMP | + GEN8_RT_DW1_POST_BLEND_CLAMP; + + if (!(rt->colormask & PIPE_MASK_A)) + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A; + if (!(rt->colormask & PIPE_MASK_R)) + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R; + if (!(rt->colormask & PIPE_MASK_G)) + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G; + if (!(rt->colormask & PIPE_MASK_B)) + cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B; + + if (state->logicop_enable) { + cso->dw_blend = 0; + cso->dw_blend_dst_alpha_forced_one = 0; + } else { + bool tmp[2]; + + cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]); + cso->dw_blend_dst_alpha_forced_one = + blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]); + + if (tmp[0] || tmp[1]) + independent_alpha = true; + } + + return independent_alpha; +} + +static uint32_t +blend_get_logicop_enable_gen6(const struct ilo_dev *dev, + const struct pipe_blend_state *state) +{ + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!state->logicop_enable) + return 0; + + return GEN6_RT_DW1_LOGICOP_ENABLE | + gen6_translate_pipe_logicop(state->logicop_func) << 18; +} + +static uint32_t +blend_get_logicop_enable_gen8(const struct ilo_dev *dev, + const struct pipe_blend_state *state) +{ + ILO_DEV_ASSERT(dev, 8, 8); + + if (!state->logicop_enable) + return 0; + + return GEN8_RT_DW1_LOGICOP_ENABLE | + gen6_translate_pipe_logicop(state->logicop_func) << 27; +} + +static uint32_t +blend_get_alpha_mod_gen6(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + bool dual_blend) +{ + uint32_t dw = 0; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (state->alpha_to_coverage) { + dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER; + } + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 378: + * + * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) + * must be disabled." + */ + if (state->alpha_to_one && !dual_blend) + dw |= GEN6_RT_DW1_ALPHA_TO_ONE; + + return dw; +} + +static uint32_t +blend_get_alpha_mod_gen8(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + bool dual_blend) +{ + uint32_t dw = 0; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (state->alpha_to_coverage) { + dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE | + GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER; + } + + if (state->alpha_to_one && !dual_blend) + dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE; + + return dw; +} + +static uint32_t +blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0) +{ + int rgb_src, rgb_dst, a_src, a_dst; + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE)) + return 0; + + a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR); + a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR); + rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR); + rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR); + + dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE; + dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR); + dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR); + dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR); + dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR); + + if (a_src != rgb_src || a_dst != rgb_dst) + dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE; + + return dw; +} + +void +ilo_gpe_init_blend(const struct ilo_dev *dev, + const struct pipe_blend_state *state, + struct ilo_blend_state *blend) +{ + unsigned i; + + ILO_DEV_ASSERT(dev, 6, 8); + + blend->dual_blend = (util_blend_state_is_dual(state, 0) && + state->rt[0].blend_enable && + !state->logicop_enable); + blend->alpha_to_coverage = state->alpha_to_coverage; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + bool independent_alpha; + + blend->dw_alpha_mod = + blend_get_alpha_mod_gen8(dev, state, blend->dual_blend); + blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state); + blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0; + + independent_alpha = blend_init_cso_gen8(dev, state, blend, 0); + if (independent_alpha) + blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; + + blend->dw_ps_blend = blend_get_ps_blend_gen8(dev, + blend->cso[0].dw_blend); + blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev, + blend->cso[0].dw_blend_dst_alpha_forced_one); + + if (state->independent_blend_enable) { + for (i = 1; i < Elements(blend->cso); i++) { + independent_alpha = blend_init_cso_gen8(dev, state, blend, i); + if (independent_alpha) + blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; + } + } else { + for (i = 1; i < Elements(blend->cso); i++) + blend->cso[i] = blend->cso[0]; + } + } else { + blend->dw_alpha_mod = + blend_get_alpha_mod_gen6(dev, state, blend->dual_blend); + blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state); + blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0; + + blend->dw_ps_blend = 0; + blend->dw_ps_blend_dst_alpha_forced_one = 0; + + blend_init_cso_gen6(dev, state, blend, 0); + if (state->independent_blend_enable) { + for (i = 1; i < Elements(blend->cso); i++) + blend_init_cso_gen6(dev, state, blend, i); + } else { + for (i = 1; i < Elements(blend->cso); i++) + blend->cso[i] = blend->cso[0]; + } + } +} + +/** + * Translate a pipe DSA test function to the matching hardware compare + * function. + */ +static int +gen6_translate_dsa_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; + case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; + default: + assert(!"unknown depth/stencil/alpha test function"); + return GEN6_COMPAREFUNCTION_NEVER; + } +} + +static uint32_t +dsa_get_stencil_enable_gen6(const struct ilo_dev *dev, + const struct pipe_stencil_state *stencil0, + const struct pipe_stencil_state *stencil1) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!stencil0->enabled) + return 0; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 359: + * + * "If the Depth Buffer is either undefined or does not have a surface + * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate + * stencil buffer is disabled, Stencil Test Enable must be DISABLED" + * + * From the Sandy Bridge PRM, volume 2 part 1, page 370: + * + * "This field (Stencil Test Enable) cannot be enabled if + * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." + * + * TODO We do not check these yet. + */ + dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE | + gen6_translate_dsa_func(stencil0->func) << 28 | + gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 | + gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 | + gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19; + if (stencil0->writemask) + dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; + + if (stencil1->enabled) { + dw |= GEN6_ZS_DW0_STENCIL1_ENABLE | + gen6_translate_dsa_func(stencil1->func) << 12 | + gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 | + gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 | + gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3; + if (stencil1->writemask) + dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; + } + + return dw; +} + +static uint32_t +dsa_get_stencil_enable_gen8(const struct ilo_dev *dev, + const struct pipe_stencil_state *stencil0, + const struct pipe_stencil_state *stencil1) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!stencil0->enabled) + return 0; + + dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 | + gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 | + gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 | + gen6_translate_dsa_func(stencil0->func) << 8 | + GEN8_ZS_DW1_STENCIL_TEST_ENABLE; + if (stencil0->writemask) + dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; + + if (stencil1->enabled) { + dw |= gen6_translate_dsa_func(stencil1->func) << 20 | + gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 | + gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 | + gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 | + GEN8_ZS_DW1_STENCIL1_ENABLE; + if (stencil1->writemask) + dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; + } + + return dw; +} + +static uint32_t +dsa_get_depth_enable_gen6(const struct ilo_dev *dev, + const struct pipe_depth_state *state) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 360: + * + * "Enabling the Depth Test function without defining a Depth Buffer is + * UNDEFINED." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 375: + * + * "A Depth Buffer must be defined before enabling writes to it, or + * operation is UNDEFINED." + * + * TODO We do not check these yet. + */ + if (state->enabled) { + dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE | + gen6_translate_dsa_func(state->func) << 27; + } else { + dw = GEN6_COMPAREFUNCTION_ALWAYS << 27; + } + + if (state->writemask) + dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE; + + return dw; +} + +static uint32_t +dsa_get_depth_enable_gen8(const struct ilo_dev *dev, + const struct pipe_depth_state *state) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (state->enabled) { + dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE | + gen6_translate_dsa_func(state->func) << 5; + } else { + dw = GEN6_COMPAREFUNCTION_ALWAYS << 5; + } + + if (state->writemask) + dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE; + + return dw; +} + +static uint32_t +dsa_get_alpha_enable_gen6(const struct ilo_dev *dev, + const struct pipe_alpha_state *state) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + if (!state->enabled) + return 0; + + /* this will be ORed to BLEND_STATE */ + dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE | + gen6_translate_dsa_func(state->func) << 13; + + return dw; +} + +static uint32_t +dsa_get_alpha_enable_gen8(const struct ilo_dev *dev, + const struct pipe_alpha_state *state) +{ + uint32_t dw; + + ILO_DEV_ASSERT(dev, 8, 8); + + if (!state->enabled) + return 0; + + /* this will be ORed to BLEND_STATE */ + dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE | + gen6_translate_dsa_func(state->func) << 24; + + return dw; +} + +void +ilo_gpe_init_dsa(const struct ilo_dev *dev, + const struct pipe_depth_stencil_alpha_state *state, + struct ilo_dsa_state *dsa) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(Elements(dsa->payload) >= 3); + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev, + &state->stencil[0], &state->stencil[1]); + const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth); + + assert(!(dw_stencil & dw_depth)); + dsa->payload[0] = dw_stencil | dw_depth; + + dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha); + dsa->dw_ps_blend_alpha = (state->alpha.enabled) ? + GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0; + } else { + dsa->payload[0] = dsa_get_stencil_enable_gen6(dev, + &state->stencil[0], &state->stencil[1]); + dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth); + + dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha); + dsa->dw_ps_blend_alpha = 0; + } + + dsa->payload[1] = state->stencil[0].valuemask << 24 | + state->stencil[0].writemask << 16 | + state->stencil[1].valuemask << 8 | + state->stencil[1].writemask; + + dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value); +} + +void +ilo_gpe_set_scissor(const struct ilo_dev *dev, + unsigned start_slot, + unsigned num_states, + const struct pipe_scissor_state *states, + struct ilo_scissor_state *scissor) +{ + unsigned i; + + ILO_DEV_ASSERT(dev, 6, 8); + + for (i = 0; i < num_states; i++) { + uint16_t min_x, min_y, max_x, max_y; + + /* both max and min are inclusive in SCISSOR_RECT */ + if (states[i].minx < states[i].maxx && + states[i].miny < states[i].maxy) { + min_x = states[i].minx; + min_y = states[i].miny; + max_x = states[i].maxx - 1; + max_y = states[i].maxy - 1; + } + else { + /* we have to make min greater than max */ + min_x = 1; + min_y = 1; + max_x = 0; + max_y = 0; + } + + scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x; + scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x; + } + + if (!start_slot && num_states) + scissor->scissor0 = states[0]; +} + +void +ilo_gpe_set_scissor_null(const struct ilo_dev *dev, + struct ilo_scissor_state *scissor) +{ + unsigned i; + + for (i = 0; i < Elements(scissor->payload); i += 2) { + scissor->payload[i + 0] = 1 << 16 | 1; + scissor->payload[i + 1] = 0; + } +} + +static void +fb_set_blend_caps(const struct ilo_dev *dev, + enum pipe_format format, + struct ilo_fb_blend_caps *caps) +{ + const struct util_format_description *desc = + util_format_description(format); + const int ch = util_format_get_first_non_void_channel(format); + + memset(caps, 0, sizeof(*caps)); + + if (format == PIPE_FORMAT_NONE || desc->is_mixed) + return; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB + * variants), otherwise Logic Ops must be DISABLED." + * + * According to the classic driver, this is lifted on Gen8+. + */ + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + caps->can_logicop = true; + } else { + caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized && + desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && + desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); + } + + /* no blending for pure integer formats */ + caps->can_blend = !util_format_is_pure_integer(format); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + caps->can_alpha_test = !util_format_is_pure_integer(format); + + caps->dst_alpha_forced_one = + (ilo_format_translate_render(dev, format) != + ilo_format_translate_color(dev, format)); + + /* sanity check */ + if (caps->dst_alpha_forced_one) { + enum pipe_format render_format; + + switch (format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + render_format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + default: + render_format = PIPE_FORMAT_NONE; + break; + } + + assert(ilo_format_translate_render(dev, format) == + ilo_format_translate_color(dev, render_format)); + } +} + +void +ilo_gpe_set_fb(const struct ilo_dev *dev, + const struct pipe_framebuffer_state *state, + struct ilo_fb_state *fb) +{ + const struct pipe_surface *first_surf = NULL; + int i; + + ILO_DEV_ASSERT(dev, 6, 8); + + util_copy_framebuffer_state(&fb->state, state); + + ilo_gpe_init_view_surface_null(dev, + (state->width) ? state->width : 1, + (state->height) ? state->height : 1, + 1, 0, &fb->null_rt); + + for (i = 0; i < state->nr_cbufs; i++) { + if (state->cbufs[i]) { + fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); + + if (!first_surf) + first_surf = state->cbufs[i]; + } else { + fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]); + } + } + + if (!first_surf && state->zsbuf) + first_surf = state->zsbuf; + + fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1; + if (!fb->num_samples) + fb->num_samples = 1; + + /* + * The PRMs list several restrictions when the framebuffer has more than + * one surface. It seems they are actually lifted on GEN6+. + */ +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c new file mode 100644 index 00000000000..004656f8461 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c @@ -0,0 +1,1711 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "genhw/genhw.h" +#include "util/u_dual_blend.h" +#include "util/u_framebuffer.h" +#include "util/u_half.h" +#include "util/u_resource.h" + +#include "ilo_format.h" +#include "ilo_state_3d.h" +#include "../ilo_resource.h" +#include "../ilo_shader.h" + +static void +ve_init_cso(const struct ilo_dev *dev, + const struct pipe_vertex_element *state, + unsigned vb_index, + struct ilo_ve_cso *cso) +{ + int comp[4] = { + GEN6_VFCOMP_STORE_SRC, + GEN6_VFCOMP_STORE_SRC, + GEN6_VFCOMP_STORE_SRC, + GEN6_VFCOMP_STORE_SRC, + }; + int format; + + ILO_DEV_ASSERT(dev, 6, 8); + + switch (util_format_get_nr_components(state->src_format)) { + case 1: comp[1] = GEN6_VFCOMP_STORE_0; + case 2: comp[2] = GEN6_VFCOMP_STORE_0; + case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? + GEN6_VFCOMP_STORE_1_INT : + GEN6_VFCOMP_STORE_1_FP; + } + + format = ilo_format_translate_vertex(dev, state->src_format); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = + vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT | + GEN6_VE_DW0_VALID | + format << GEN6_VE_DW0_FORMAT__SHIFT | + state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; + + cso->payload[1] = + comp[0] << GEN6_VE_DW1_COMP0__SHIFT | + comp[1] << GEN6_VE_DW1_COMP1__SHIFT | + comp[2] << GEN6_VE_DW1_COMP2__SHIFT | + comp[3] << GEN6_VE_DW1_COMP3__SHIFT; +} + +void +ilo_gpe_init_ve(const struct ilo_dev *dev, + unsigned num_states, + const struct pipe_vertex_element *states, + struct ilo_ve_state *ve) +{ + unsigned i; + + ILO_DEV_ASSERT(dev, 6, 8); + + ve->count = num_states; + ve->vb_count = 0; + + for (i = 0; i < num_states; i++) { + const unsigned pipe_idx = states[i].vertex_buffer_index; + const unsigned instance_divisor = states[i].instance_divisor; + unsigned hw_idx; + + /* + * map the pipe vb to the hardware vb, which has a fixed instance + * divisor + */ + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + if (ve->vb_mapping[hw_idx] == pipe_idx && + ve->instance_divisors[hw_idx] == instance_divisor) + break; + } + + /* create one if there is no matching hardware vb */ + if (hw_idx >= ve->vb_count) { + hw_idx = ve->vb_count++; + + ve->vb_mapping[hw_idx] = pipe_idx; + ve->instance_divisors[hw_idx] = instance_divisor; + } + + ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); + } +} + +void +ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, + struct ilo_ve_cso *cso) +{ + int format; + + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE; + + /* + * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via + * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as the corresponding _UINT formats. + */ + format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT); + cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK; + + switch (format) { + case GEN6_FORMAT_R32_FLOAT: + format = GEN6_FORMAT_R32_UINT; + break; + case GEN6_FORMAT_R8_USCALED: + format = GEN6_FORMAT_R8_UINT; + break; + default: + break; + } + + cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT); + + cso->payload[1] = + GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT; +} + +void +ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + + assert(comp0 != GEN6_VFCOMP_STORE_SRC && + comp1 != GEN6_VFCOMP_STORE_SRC && + comp2 != GEN6_VFCOMP_STORE_SRC && + comp3 != GEN6_VFCOMP_STORE_SRC); + + cso->payload[0] = GEN6_VE_DW0_VALID; + cso->payload[1] = + comp0 << GEN6_VE_DW1_COMP0__SHIFT | + comp1 << GEN6_VE_DW1_COMP1__SHIFT | + comp2 << GEN6_VE_DW1_COMP2__SHIFT | + comp3 << GEN6_VE_DW1_COMP3__SHIFT; +} + +void +ilo_gpe_init_vs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *vs, + struct ilo_shader_cso *cso) +{ + int start_grf, vue_read_len, sampler_count, max_threads; + uint32_t dw2, dw4, dw5; + + ILO_DEV_ASSERT(dev, 6, 8); + + start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); + vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 135: + * + * "(Vertex URB Entry Read Length) Specifies the number of pairs of + * 128-bit vertex elements to be passed into the payload for each + * vertex." + * + * "It is UNDEFINED to set this field to 0 indicating no Vertex URB + * data to be read and passed to the thread." + */ + vue_read_len = (vue_read_len + 1) / 2; + if (!vue_read_len) + vue_read_len = 1; + + max_threads = dev->thread_count; + if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2) + max_threads *= 2; + + dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | + vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | + 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; + + dw5 = GEN6_VS_DW5_STATISTICS | + GEN6_VS_DW5_VS_ENABLE; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; + else + dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; + + STATIC_ASSERT(Elements(cso->payload) >= 3); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; +} + +static void +gs_init_cso_gen6(const struct ilo_dev *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) +{ + int start_grf, vue_read_len, max_threads; + uint32_t dw2, dw4, dw5, dw6; + + ILO_DEV_ASSERT(dev, 6, 6); + + if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { + start_grf = ilo_shader_get_kernel_param(gs, + ILO_KERNEL_URB_DATA_START_REG); + + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); + } + else { + start_grf = ilo_shader_get_kernel_param(gs, + ILO_KERNEL_VS_GEN6_SO_START_REG); + + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 153: + * + * "Specifies the amount of URB data read and passed in the thread + * payload for each Vertex URB entry, in 256-bit register increments. + * + * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to + * 0 indicating no Vertex URB data to be read and passed to the + * thread." + */ + vue_read_len = (vue_read_len + 1) / 2; + if (!vue_read_len) + vue_read_len = 1; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 154: + * + * "Maximum Number of Threads valid range is [0,27] when Rendering + * Enabled bit is set." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 173: + * + * "Programming Note: If the GS stage is enabled, software must always + * allocate at least one GS URB Entry. This is true even if the GS + * thread never needs to output vertices to the pipeline, e.g., when + * only performing stream output. This is an artifact of the need to + * pass the GS thread an initial destination URB handle." + * + * As such, we always enable rendering, and limit the number of threads. + */ + if (dev->gt == 2) { + /* maximum is 60, but limited to 28 */ + max_threads = 28; + } + else { + /* maximum is 24, but limited to 21 (see brwCreateContext()) */ + max_threads = 21; + } + + dw2 = GEN6_THREADDISP_SPF; + + dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | + 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | + start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; + + dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | + GEN6_GS_DW5_STATISTICS | + GEN6_GS_DW5_SO_STATISTICS | + GEN6_GS_DW5_RENDER_ENABLE; + + /* + * we cannot make use of GEN6_GS_REORDER because it will reorder + * triangle strips according to D3D rules (triangle 2N+1 uses vertices + * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices + * (2N+2, 2N+1, 2N+3)). + */ + dw6 = GEN6_GS_DW6_GS_ENABLE; + + if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) + dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; + + if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { + const uint32_t svbi_post_inc = + ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); + + dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; + if (svbi_post_inc) { + dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | + svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; + } + } + + STATIC_ASSERT(Elements(cso->payload) >= 4); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; + cso->payload[3] = dw6; +} + +static void +gs_init_cso_gen7(const struct ilo_dev *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) +{ + int start_grf, vue_read_len, sampler_count, max_threads; + uint32_t dw2, dw4, dw5; + + ILO_DEV_ASSERT(dev, 7, 7.5); + + start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); + + /* in pairs */ + vue_read_len = (vue_read_len + 1) / 2; + + switch (ilo_dev_gen(dev)) { + case ILO_GEN(7.5): + max_threads = (dev->gt >= 2) ? 256 : 70; + break; + case ILO_GEN(7): + max_threads = (dev->gt == 2) ? 128 : 36; + break; + default: + max_threads = 1; + break; + } + + dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | + GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | + 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | + start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; + + dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | + GEN7_GS_DW5_STATISTICS | + GEN7_GS_DW5_GS_ENABLE; + + STATIC_ASSERT(Elements(cso->payload) >= 3); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; +} + +void +ilo_gpe_init_gs_cso(const struct ilo_dev *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + gs_init_cso_gen7(dev, gs, cso); + else + gs_init_cso_gen6(dev, gs, cso); +} + +static void +view_init_null_gen6(const struct ilo_dev *dev, + unsigned width, unsigned height, + unsigned depth, unsigned level, + struct ilo_view_surface *surf) +{ + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 6, 6); + + assert(width >= 1 && height >= 1 && depth >= 1); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 71: + * + * "A null surface will be used in instances where an actual surface is + * not bound. When a write message is generated to a null surface, no + * actual surface is written to. When a read message (including any + * sampling engine message) is generated to a null surface, the result + * is all zeros. Note that a null surface type is allowed to be used + * with all messages, even if it is not specificially indicated as + * supported. All of the remaining fields in surface state are ignored + * for null surfaces, with the following exceptions: + * + * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the + * depth buffer's corresponding state for all render target + * surfaces, including null. + * * Surface Format must be R8G8B8A8_UNORM." + * + * From the Sandy Bridge PRM, volume 4 part 1, page 82: + * + * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be + * true" + */ + + STATIC_ASSERT(Elements(surf->payload) >= 6); + dw = surf->payload; + + dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | + GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; + + dw[1] = 0; + + dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | + (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | + level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; + + dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | + GEN6_TILING_X; + + dw[4] = 0; + dw[5] = 0; +} + +static void +view_init_for_buffer_gen6(const struct ilo_dev *dev, + const struct ilo_buffer *buf, + unsigned offset, unsigned size, + unsigned struct_size, + enum pipe_format elem_format, + bool is_rt, bool render_cache_rw, + struct ilo_view_surface *surf) +{ + const int elem_size = util_format_get_blocksize(elem_format); + int width, height, depth, pitch; + int surface_format, num_entries; + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 6, 6); + + /* + * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a + * structure in a buffer. + */ + + surface_format = ilo_format_translate_color(dev, elem_format); + + num_entries = size / struct_size; + /* see if there is enough space to fit another element */ + if (size % struct_size >= elem_size) + num_entries++; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 76: + * + * "For SURFTYPE_BUFFER render targets, this field (Surface Base + * Address) specifies the base address of first element of the + * surface. The surface is interpreted as a simple array of that + * single element type. The address must be naturally-aligned to the + * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements + * must be 16-byte aligned). + * + * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies + * the base address of the first element of the surface, computed in + * software by adding the surface base address to the byte offset of + * the element in the buffer." + */ + if (is_rt) + assert(offset % elem_size == 0); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 77: + * + * "For buffer surfaces, the number of entries in the buffer ranges + * from 1 to 2^27." + */ + assert(num_entries >= 1 && num_entries <= 1 << 27); + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) + * indicates the size of the structure." + */ + pitch = struct_size; + + pitch--; + num_entries--; + /* bits [6:0] */ + width = (num_entries & 0x0000007f); + /* bits [19:7] */ + height = (num_entries & 0x000fff80) >> 7; + /* bits [26:20] */ + depth = (num_entries & 0x07f00000) >> 20; + + STATIC_ASSERT(Elements(surf->payload) >= 6); + dw = surf->payload; + + dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | + surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; + if (render_cache_rw) + dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; + + dw[1] = offset; + + dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | + width << GEN6_SURFACE_DW2_WIDTH__SHIFT; + + dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | + pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; + + dw[4] = 0; + dw[5] = 0; +} + +static void +view_init_for_texture_gen6(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, + unsigned first_level, + unsigned num_levels, + unsigned first_layer, + unsigned num_layers, + bool is_rt, + struct ilo_view_surface *surf) +{ + int surface_type, surface_format; + int width, height, depth, pitch, lod; + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 6, 6); + + surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); + assert(surface_type != GEN6_SURFTYPE_BUFFER); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) + format = PIPE_FORMAT_Z32_FLOAT; + + if (is_rt) + surface_format = ilo_format_translate_render(dev, format); + else + surface_format = ilo_format_translate_texture(dev, format); + assert(surface_format >= 0); + + width = tex->image.width0; + height = tex->image.height0; + depth = (tex->base.target == PIPE_TEXTURE_3D) ? + tex->base.depth0 : num_layers; + pitch = tex->image.bo_stride; + + if (surface_type == GEN6_SURFTYPE_CUBE) { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the + * range of this field (Depth) is [0,84], indicating the number of + * cube array elements (equal to the number of underlying 2D array + * elements divided by 6). For other surfaces, this field must be + * zero." + * + * When is_rt is true, we treat the texture as a 2D one to avoid the + * restriction. + */ + if (is_rt) { + surface_type = GEN6_SURFTYPE_2D; + } + else { + assert(num_layers % 6 == 0); + depth = num_layers / 6; + } + } + + /* sanity check the size */ + assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); + switch (surface_type) { + case GEN6_SURFTYPE_1D: + assert(width <= 8192 && height == 1 && depth <= 512); + assert(first_layer < 512 && num_layers <= 512); + break; + case GEN6_SURFTYPE_2D: + assert(width <= 8192 && height <= 8192 && depth <= 512); + assert(first_layer < 512 && num_layers <= 512); + break; + case GEN6_SURFTYPE_3D: + assert(width <= 2048 && height <= 2048 && depth <= 2048); + assert(first_layer < 2048 && num_layers <= 512); + if (!is_rt) + assert(first_layer == 0); + break; + case GEN6_SURFTYPE_CUBE: + assert(width <= 8192 && height <= 8192 && depth <= 85); + assert(width == height); + assert(first_layer < 512 && num_layers <= 512); + if (is_rt) + assert(first_layer == 0); + break; + default: + assert(!"unexpected surface type"); + break; + } + + /* non-full array spacing is supported only on GEN7+ */ + assert(tex->image.walk != ILO_IMAGE_WALK_LOD); + /* non-interleaved samples are supported only on GEN7+ */ + if (tex->base.nr_samples > 1) + assert(tex->image.interleaved_samples); + + if (is_rt) { + assert(num_levels == 1); + lod = first_level; + } + else { + lod = num_levels - 1; + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 76: + * + * "Linear render target surface base addresses must be element-size + * aligned, for non-YUV surface formats, or a multiple of 2 + * element-sizes for YUV surface formats. Other linear surfaces have + * no alignment requirements (byte alignment is sufficient.)" + * + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "For linear render target surfaces, the pitch must be a multiple + * of the element size for non-YUV surface formats. Pitch must be a + * multiple of 2 * element size for YUV surface formats." + * + * From the Sandy Bridge PRM, volume 4 part 1, page 86: + * + * "For linear surfaces, this field (X Offset) must be zero" + */ + if (tex->image.tiling == GEN6_TILING_NONE) { + if (is_rt) { + const int elem_size = util_format_get_blocksize(format); + assert(pitch % elem_size == 0); + } + } + + STATIC_ASSERT(Elements(surf->payload) >= 6); + dw = surf->payload; + + dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | + surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | + GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; + + if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { + dw[0] |= 1 << 9 | + GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + } + + if (is_rt) + dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; + + dw[1] = 0; + + dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | + (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | + lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; + + assert(tex->image.tiling != GEN8_TILING_W); + dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | + (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | + tex->image.tiling; + + dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | + first_layer << 17 | + (num_layers - 1) << 8 | + ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : + GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); + + dw[5] = 0; + + assert(tex->image.align_j == 2 || tex->image.align_j == 4); + if (tex->image.align_j == 4) + dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; +} + +static void +view_init_null_gen7(const struct ilo_dev *dev, + unsigned width, unsigned height, + unsigned depth, unsigned level, + struct ilo_view_surface *surf) +{ + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 7, 8); + + assert(width >= 1 && height >= 1 && depth >= 1); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 62: + * + * "A null surface is used in instances where an actual surface is not + * bound. When a write message is generated to a null surface, no + * actual surface is written to. When a read message (including any + * sampling engine message) is generated to a null surface, the result + * is all zeros. Note that a null surface type is allowed to be used + * with all messages, even if it is not specificially indicated as + * supported. All of the remaining fields in surface state are ignored + * for null surfaces, with the following exceptions: + * + * * Width, Height, Depth, LOD, and Render Target View Extent fields + * must match the depth buffer's corresponding state for all render + * target surfaces, including null. + * * All sampling engine and data port messages support null surfaces + * with the above behavior, even if not mentioned as specifically + * supported, except for the following: + * * Data Port Media Block Read/Write messages. + * * The Surface Type of a surface used as a render target (accessed + * via the Data Port's Render Target Write message) must be the same + * as the Surface Type of all other render targets and of the depth + * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth + * buffer or render targets are SURFTYPE_NULL." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 65: + * + * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be + * true" + */ + + STATIC_ASSERT(Elements(surf->payload) >= 13); + dw = surf->payload; + + dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | + GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT; + else + dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT; + + dw[1] = 0; + + dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | + GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); + + dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH); + + dw[4] = 0; + dw[5] = level; + + dw[6] = 0; + dw[7] = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); +} + +static void +view_init_for_buffer_gen7(const struct ilo_dev *dev, + const struct ilo_buffer *buf, + unsigned offset, unsigned size, + unsigned struct_size, + enum pipe_format elem_format, + bool is_rt, bool render_cache_rw, + struct ilo_view_surface *surf) +{ + const bool typed = (elem_format != PIPE_FORMAT_NONE); + const bool structured = (!typed && struct_size > 1); + const int elem_size = (typed) ? + util_format_get_blocksize(elem_format) : 1; + int width, height, depth, pitch; + int surface_type, surface_format, num_entries; + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 7, 8); + + surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; + + surface_format = (typed) ? + ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW; + + num_entries = size / struct_size; + /* see if there is enough space to fit another element */ + if (size % struct_size >= elem_size && !structured) + num_entries++; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 67: + * + * "For SURFTYPE_BUFFER render targets, this field (Surface Base + * Address) specifies the base address of first element of the + * surface. The surface is interpreted as a simple array of that + * single element type. The address must be naturally-aligned to the + * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements + * must be 16-byte aligned) + * + * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies + * the base address of the first element of the surface, computed in + * software by adding the surface base address to the byte offset of + * the element in the buffer." + */ + if (is_rt) + assert(offset % elem_size == 0); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 68: + * + * "For typed buffer and structured buffer surfaces, the number of + * entries in the buffer ranges from 1 to 2^27. For raw buffer + * surfaces, the number of entries in the buffer is the number of + * bytes which can range from 1 to 2^30." + */ + assert(num_entries >= 1 && + num_entries <= 1 << ((typed || structured) ? 27 : 30)); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 69: + * + * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be + * 11 if the Surface Format is RAW (the size of the buffer must be a + * multiple of 4 bytes)." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 70: + * + * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this + * field (Surface Pitch) indicates the size of the structure." + * + * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch + * must be a multiple of 4 bytes." + */ + if (structured) + assert(struct_size % 4 == 0); + else if (!typed) + assert(num_entries % 4 == 0); + + pitch = struct_size; + + pitch--; + num_entries--; + /* bits [6:0] */ + width = (num_entries & 0x0000007f); + /* bits [20:7] */ + height = (num_entries & 0x001fff80) >> 7; + /* bits [30:21] */ + depth = (num_entries & 0x7fe00000) >> 21; + /* limit to [26:21] */ + if (typed || structured) + depth &= 0x3f; + + STATIC_ASSERT(Elements(surf->payload) >= 13); + dw = surf->payload; + + dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | + surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; + if (render_cache_rw) + dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + dw[8] = offset; + memset(&dw[9], 0, sizeof(*dw) * (13 - 9)); + } else { + dw[1] = offset; + } + + dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | + GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); + + dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | + pitch; + + dw[4] = 0; + dw[5] = 0; + + dw[6] = 0; + dw[7] = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | + GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | + GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | + GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); + } +} + +static void +view_init_for_texture_gen7(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, + unsigned first_level, + unsigned num_levels, + unsigned first_layer, + unsigned num_layers, + bool is_rt, + struct ilo_view_surface *surf) +{ + int surface_type, surface_format; + int width, height, depth, pitch, lod; + uint32_t *dw; + + ILO_DEV_ASSERT(dev, 7, 8); + + surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); + assert(surface_type != GEN6_SURFTYPE_BUFFER); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) + format = PIPE_FORMAT_Z32_FLOAT; + + if (is_rt) + surface_format = ilo_format_translate_render(dev, format); + else + surface_format = ilo_format_translate_texture(dev, format); + assert(surface_format >= 0); + + width = tex->image.width0; + height = tex->image.height0; + depth = (tex->base.target == PIPE_TEXTURE_3D) ? + tex->base.depth0 : num_layers; + pitch = tex->image.bo_stride; + + if (surface_type == GEN6_SURFTYPE_CUBE) { + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 70: + * + * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of + * this field is [0,340], indicating the number of cube array + * elements (equal to the number of underlying 2D array elements + * divided by 6). For other surfaces, this field must be zero." + * + * When is_rt is true, we treat the texture as a 2D one to avoid the + * restriction. + */ + if (is_rt) { + surface_type = GEN6_SURFTYPE_2D; + } + else { + assert(num_layers % 6 == 0); + depth = num_layers / 6; + } + } + + /* sanity check the size */ + assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); + assert(first_layer < 2048 && num_layers <= 2048); + switch (surface_type) { + case GEN6_SURFTYPE_1D: + assert(width <= 16384 && height == 1 && depth <= 2048); + break; + case GEN6_SURFTYPE_2D: + assert(width <= 16384 && height <= 16384 && depth <= 2048); + break; + case GEN6_SURFTYPE_3D: + assert(width <= 2048 && height <= 2048 && depth <= 2048); + if (!is_rt) + assert(first_layer == 0); + break; + case GEN6_SURFTYPE_CUBE: + assert(width <= 16384 && height <= 16384 && depth <= 86); + assert(width == height); + if (is_rt) + assert(first_layer == 0); + break; + default: + assert(!"unexpected surface type"); + break; + } + + if (is_rt) { + assert(num_levels == 1); + lod = first_level; + } + else { + lod = num_levels - 1; + } + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 68: + * + * "The Base Address for linear render target surfaces and surfaces + * accessed with the typed surface read/write data port messages must + * be element-size aligned, for non-YUV surface formats, or a multiple + * of 2 element-sizes for YUV surface formats. Other linear surfaces + * have no alignment requirements (byte alignment is sufficient)." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 70: + * + * "For linear render target surfaces and surfaces accessed with the + * typed data port messages, the pitch must be a multiple of the + * element size for non-YUV surface formats. Pitch must be a multiple + * of 2 * element size for YUV surface formats. For linear surfaces + * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple + * of 4 bytes.For other linear surfaces, the pitch can be any multiple + * of bytes." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 74: + * + * "For linear surfaces, this field (X Offset) must be zero." + */ + if (tex->image.tiling == GEN6_TILING_NONE) { + if (is_rt) { + const int elem_size = util_format_get_blocksize(format); + assert(pitch % elem_size == 0); + } + } + + STATIC_ASSERT(Elements(surf->payload) >= 13); + dw = surf->payload; + + dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | + surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * "If this field (Surface Array) is enabled, the Surface Type must be + * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is + * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or + * SURFTYPE_CUBE, the Depth field must be set to zero." + * + * For non-3D sampler surfaces, resinfo (the sampler message) always + * returns zero for the number of layers when this field is not set. + */ + if (surface_type != GEN6_SURFTYPE_3D) { + if (util_resource_is_array_texture(&tex->base)) + dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; + else + assert(depth == 1); + } + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + switch (tex->image.align_j) { + case 4: + dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; + break; + case 8: + dw[0] |= GEN8_SURFACE_DW0_VALIGN_8; + break; + case 16: + dw[0] |= GEN8_SURFACE_DW0_VALIGN_16; + break; + default: + assert(!"unsupported valign"); + break; + } + + switch (tex->image.align_i) { + case 4: + dw[0] |= GEN8_SURFACE_DW0_HALIGN_4; + break; + case 8: + dw[0] |= GEN8_SURFACE_DW0_HALIGN_8; + break; + case 16: + dw[0] |= GEN8_SURFACE_DW0_HALIGN_16; + break; + default: + assert(!"unsupported halign"); + break; + } + + dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT; + } else { + assert(tex->image.align_i == 4 || tex->image.align_i == 8); + assert(tex->image.align_j == 2 || tex->image.align_j == 4); + + if (tex->image.align_j == 4) + dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; + + if (tex->image.align_i == 8) + dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; + + assert(tex->image.tiling != GEN8_TILING_W); + dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT; + + if (tex->image.walk == ILO_IMAGE_WALK_LOD) + dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; + else + dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; + } + + if (is_rt) + dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; + + if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) + dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + assert(tex->image.layer_height % 4 == 0); + dw[1] = tex->image.layer_height / 4; + } else { + dw[1] = 0; + } + + dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | + GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); + + dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) | + (pitch - 1); + + dw[4] = first_layer << 18 | + (num_layers - 1) << 7; + + /* + * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL + * means the samples are interleaved. The layouts are the same when the + * number of samples is 1. + */ + if (tex->image.interleaved_samples && tex->base.nr_samples > 1) { + assert(!is_rt); + dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; + } + else { + dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; + } + + switch (tex->base.nr_samples) { + case 0: + case 1: + default: + dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; + break; + case 2: + dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2; + break; + case 4: + dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; + break; + case 8: + dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; + break; + case 16: + dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16; + break; + } + + dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) | + lod; + + dw[6] = 0; + dw[7] = 0; + + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | + GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | + GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | + GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); + } + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); +} + +void +ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, + unsigned width, unsigned height, + unsigned depth, unsigned level, + struct ilo_view_surface *surf) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + view_init_null_gen7(dev, + width, height, depth, level, surf); + } else { + view_init_null_gen6(dev, + width, height, depth, level, surf); + } + + surf->bo = NULL; + surf->scanout = false; +} + +void +ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, + const struct ilo_buffer *buf, + unsigned offset, unsigned size, + unsigned struct_size, + enum pipe_format elem_format, + bool is_rt, bool render_cache_rw, + struct ilo_view_surface *surf) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + view_init_for_buffer_gen7(dev, buf, offset, size, + struct_size, elem_format, is_rt, render_cache_rw, surf); + } else { + view_init_for_buffer_gen6(dev, buf, offset, size, + struct_size, elem_format, is_rt, render_cache_rw, surf); + } + + /* do not increment reference count */ + surf->bo = buf->bo; + surf->scanout = false; +} + +void +ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev, + const struct ilo_texture *tex, + enum pipe_format format, + unsigned first_level, + unsigned num_levels, + unsigned first_layer, + unsigned num_layers, + bool is_rt, + struct ilo_view_surface *surf) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + view_init_for_texture_gen7(dev, tex, format, + first_level, num_levels, first_layer, num_layers, + is_rt, surf); + } else { + view_init_for_texture_gen6(dev, tex, format, + first_level, num_levels, first_layer, num_layers, + is_rt, surf); + } + + /* do not increment reference count */ + surf->bo = tex->image.bo; + + /* assume imported RTs are scanouts */ + surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) || + (tex->imported && (tex->base.bind & PIPE_BIND_RENDER_TARGET))); +} + +static void +sampler_init_border_color_gen6(const struct ilo_dev *dev, + const union pipe_color_union *color, + uint32_t *dw, int num_dwords) +{ + float rgba[4] = { + color->f[0], color->f[1], color->f[2], color->f[3], + }; + + ILO_DEV_ASSERT(dev, 6, 6); + + assert(num_dwords >= 12); + + /* + * This state is not documented in the Sandy Bridge PRM, but in the + * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. + */ + + /* IEEE_FP */ + dw[1] = fui(rgba[0]); + dw[2] = fui(rgba[1]); + dw[3] = fui(rgba[2]); + dw[4] = fui(rgba[3]); + + /* FLOAT_16 */ + dw[5] = util_float_to_half(rgba[0]) | + util_float_to_half(rgba[1]) << 16; + dw[6] = util_float_to_half(rgba[2]) | + util_float_to_half(rgba[3]) << 16; + + /* clamp to [-1.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); + + /* SNORM16 */ + dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | + (int16_t) util_iround(rgba[1] * 32767.0f) << 16; + dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | + (int16_t) util_iround(rgba[3] * 32767.0f) << 16; + + /* SNORM8 */ + dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | + (int8_t) util_iround(rgba[1] * 127.0f) << 8 | + (int8_t) util_iround(rgba[2] * 127.0f) << 16 | + (int8_t) util_iround(rgba[3] * 127.0f) << 24; + + /* clamp to [0.0f, 1.0f] */ + rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); + rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); + rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); + rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); + + /* UNORM8 */ + dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | + (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | + (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | + (uint8_t) util_iround(rgba[3] * 255.0f) << 24; + + /* UNORM16 */ + dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | + (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; + dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | + (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; +} + +/** + * Translate a pipe texture mipfilter to the matching hardware mipfilter. + */ +static int +gen6_translate_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; + default: + assert(!"unknown mipfilter"); + return GEN6_MIPFILTER_NONE; + } +} + +/** + * Translate a pipe texture filter to the matching hardware mapfilter. + */ +static int +gen6_translate_tex_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; + default: + assert(!"unknown sampler filter"); + return GEN6_MAPFILTER_NEAREST; + } +} + +/** + * Translate a pipe texture coordinate wrapping mode to the matching hardware + * wrapping mode. + */ +static int +gen6_translate_tex_wrap(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER; + case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(!"unknown sampler wrap mode"); + return GEN6_TEXCOORDMODE_WRAP; + } +} + +/** + * Translate a pipe shadow compare function to the matching hardware shadow + * function. + */ +static int +gen6_translate_shadow_func(unsigned func) +{ + /* + * For PIPE_FUNC_x, the reference value is on the left-hand side of the + * comparison, and 1.0 is returned when the comparison is true. + * + * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of + * the comparison, and 0.0 is returned when the comparison is true. + */ + switch (func) { + case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; + case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; + case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; + default: + assert(!"unknown shadow compare function"); + return GEN6_COMPAREFUNCTION_NEVER; + } +} + +void +ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, + const struct pipe_sampler_state *state, + struct ilo_sampler_cso *sampler) +{ + int mip_filter, min_filter, mag_filter, max_aniso; + int lod_bias, max_lod, min_lod; + int wrap_s, wrap_t, wrap_r, wrap_cube; + uint32_t dw0, dw1, dw3; + + ILO_DEV_ASSERT(dev, 6, 8); + + memset(sampler, 0, sizeof(*sampler)); + + mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); + min_filter = gen6_translate_tex_filter(state->min_img_filter); + mag_filter = gen6_translate_tex_filter(state->mag_img_filter); + + sampler->anisotropic = state->max_anisotropy; + + if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) + max_aniso = state->max_anisotropy / 2 - 1; + else if (state->max_anisotropy > 16) + max_aniso = GEN6_ANISORATIO_16; + else + max_aniso = GEN6_ANISORATIO_2; + + /* + * + * Here is how the hardware calculate per-pixel LOD, from my reading of the + * PRMs: + * + * 1) LOD is set to log2(ratio of texels to pixels) if not specified in + * other ways. The number of texels is measured using level + * SurfMinLod. + * 2) Bias is added to LOD. + * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is + * compared with Base to determine whether magnification or + * minification is needed. (if preclamp is disabled, LOD is compared + * with Base before clamping) + * 4) If magnification is needed, or no mipmapping is requested, LOD is + * set to floor(MinLod). + * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. + * + * With Gallium interface, Base is always zero and + * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + const float scale = 256.0f; + + /* [-16.0, 16.0) in S4.8 */ + lod_bias = (int) + (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); + lod_bias &= 0x1fff; + + /* [0.0, 14.0] in U4.8 */ + max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); + min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); + } + else { + const float scale = 64.0f; + + /* [-16.0, 16.0) in S4.6 */ + lod_bias = (int) + (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); + lod_bias &= 0x7ff; + + /* [0.0, 13.0] in U4.6 */ + max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); + min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); + } + + /* + * We want LOD to be clamped to determine magnification/minification, and + * get set to zero when it is magnification or when mipmapping is disabled. + * The hardware would set LOD to floor(MinLod) and that is a problem when + * MinLod is greater than or equal to 1.0f. + * + * With Base being zero, it is always minification when MinLod is non-zero. + * To achieve our goal, we just need to set MinLod to zero and set + * MagFilter to MinFilter when mipmapping is disabled. + */ + if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { + min_lod = 0; + mag_filter = min_filter; + } + + /* determine wrap s/t/r */ + wrap_s = gen6_translate_tex_wrap(state->wrap_s); + wrap_t = gen6_translate_tex_wrap(state->wrap_t); + wrap_r = gen6_translate_tex_wrap(state->wrap_r); + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + /* + * For nearest filtering, PIPE_TEX_WRAP_CLAMP means + * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, + * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while + * additionally clamping the texture coordinates to [0.0, 1.0]. + * + * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The + * clamping has to be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + const bool clamp_is_to_edge = + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); + + if (clamp_is_to_edge) { + if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) + wrap_s = GEN6_TEXCOORDMODE_CLAMP; + if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) + wrap_t = GEN6_TEXCOORDMODE_CLAMP; + if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) + wrap_r = GEN6_TEXCOORDMODE_CLAMP; + } else { + if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) { + wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_s = true; + } + if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) { + wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_t = true; + } + if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) { + wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler->saturate_r = true; + } + } + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 107: + * + * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP + * and TEXCOORDMODE_CUBE settings are valid, and each TC component + * must have the same Address Control mode." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 96: + * + * "This field (Cube Surface Control Mode) must be set to + * CUBECTRLMODE_PROGRAMMED" + * + * Therefore, we cannot use "Cube Surface Control Mode" for semless cube + * map filtering. + */ + if (state->seamless_cube_map && + (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || + state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { + wrap_cube = GEN6_TEXCOORDMODE_CUBE; + } + else { + wrap_cube = GEN6_TEXCOORDMODE_CLAMP; + } + + if (!state->normalized_coords) { + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 98: + * + * "The following state must be set as indicated if this field + * (Non-normalized Coordinate Enable) is enabled: + * + * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, + * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. + * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. + * - Mag Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Min Mode Filter must be MAPFILTER_NEAREST or + * MAPFILTER_LINEAR. + * - Mip Mode Filter must be MIPFILTER_NONE. + * - Min LOD must be 0. + * - Max LOD must be 0. + * - MIP Count must be 0. + * - Surface Min LOD must be 0. + * - Texture LOD Bias must be 0." + */ + assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || + wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || + wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); + assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || + wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); + + assert(mag_filter == GEN6_MAPFILTER_NEAREST || + mag_filter == GEN6_MAPFILTER_LINEAR); + assert(min_filter == GEN6_MAPFILTER_NEAREST || + min_filter == GEN6_MAPFILTER_LINEAR); + + /* work around a bug in util_blitter */ + mip_filter = GEN6_MIPFILTER_NONE; + + assert(mip_filter == GEN6_MIPFILTER_NONE); + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + dw0 = 1 << 28 | + mip_filter << 20 | + lod_bias << 1; + + sampler->dw_filter = mag_filter << 17 | + min_filter << 14; + + sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | + GEN6_MAPFILTER_ANISOTROPIC << 14 | + 1; + + dw1 = min_lod << 20 | + max_lod << 8; + + if (state->compare_mode != PIPE_TEX_COMPARE_NONE) + dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; + + dw3 = max_aniso << 19; + + /* round the coordinates for linear filtering */ + if (min_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | + GEN6_SAMPLER_DW3_V_MIN_ROUND | + GEN6_SAMPLER_DW3_R_MIN_ROUND); + } + if (mag_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | + GEN6_SAMPLER_DW3_V_MAG_ROUND | + GEN6_SAMPLER_DW3_R_MAG_ROUND); + } + + if (!state->normalized_coords) + dw3 |= 1 << 10; + + sampler->dw_wrap = wrap_s << 6 | + wrap_t << 3 | + wrap_r; + + /* + * As noted in the classic i965 driver, the HW may still reference + * wrap_t and wrap_r for 1D textures. We need to set them to a safe + * mode + */ + sampler->dw_wrap_1d = wrap_s << 6 | + GEN6_TEXCOORDMODE_WRAP << 3 | + GEN6_TEXCOORDMODE_WRAP; + + sampler->dw_wrap_cube = wrap_cube << 6 | + wrap_cube << 3 | + wrap_cube; + + STATIC_ASSERT(Elements(sampler->payload) >= 7); + + sampler->payload[0] = dw0; + sampler->payload[1] = dw1; + sampler->payload[2] = dw3; + + memcpy(&sampler->payload[3], + state->border_color.ui, sizeof(state->border_color.ui)); + } + else { + dw0 = 1 << 28 | + mip_filter << 20 | + lod_bias << 3; + + if (state->compare_mode != PIPE_TEX_COMPARE_NONE) + dw0 |= gen6_translate_shadow_func(state->compare_func); + + sampler->dw_filter = (min_filter != mag_filter) << 27 | + mag_filter << 17 | + min_filter << 14; + + sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | + GEN6_MAPFILTER_ANISOTROPIC << 14; + + dw1 = min_lod << 22 | + max_lod << 12; + + sampler->dw_wrap = wrap_s << 6 | + wrap_t << 3 | + wrap_r; + + sampler->dw_wrap_1d = wrap_s << 6 | + GEN6_TEXCOORDMODE_WRAP << 3 | + GEN6_TEXCOORDMODE_WRAP; + + sampler->dw_wrap_cube = wrap_cube << 6 | + wrap_cube << 3 | + wrap_cube; + + dw3 = max_aniso << 19; + + /* round the coordinates for linear filtering */ + if (min_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | + GEN6_SAMPLER_DW3_V_MIN_ROUND | + GEN6_SAMPLER_DW3_R_MIN_ROUND); + } + if (mag_filter != GEN6_MAPFILTER_NEAREST) { + dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | + GEN6_SAMPLER_DW3_V_MAG_ROUND | + GEN6_SAMPLER_DW3_R_MAG_ROUND); + } + + if (!state->normalized_coords) + dw3 |= 1; + + STATIC_ASSERT(Elements(sampler->payload) >= 15); + + sampler->payload[0] = dw0; + sampler->payload[1] = dw1; + sampler->payload[2] = dw3; + + sampler_init_border_color_gen6(dev, + &state->border_color, &sampler->payload[3], 12); + } +} diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 3383eaf247a..02051299675 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -25,12 +25,12 @@ * Chia-I Wu */ +#include "core/ilo_state_3d.h" #include "util/u_draw.h" #include "util/u_pack_color.h" #include "ilo_draw.h" #include "ilo_state.h" -#include "ilo_state_3d.h" #include "ilo_blit.h" #include "ilo_blitter.h" diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h index 9fa53050dff..bc6925db217 100644 --- a/src/gallium/drivers/ilo/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h @@ -29,13 +29,13 @@ #define ILO_BUILDER_3D_TOP_H #include "genhw/genhw.h" +#include "core/ilo_state_3d.h" #include "core/intel_winsys.h" #include "ilo_common.h" #include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" -#include "ilo_state_3d.h" #include "ilo_builder.h" static inline void diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 788a85887dd..8a2926c1945 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -27,12 +27,12 @@ #include "genhw/genhw.h" /* for SBE setup */ #include "tgsi/tgsi_parse.h" +#include "core/ilo_state_3d.h" #include "core/intel_winsys.h" #include "shader/ilo_shader_internal.h" #include "ilo_builder.h" #include "ilo_state.h" -#include "ilo_state_3d.h" #include "ilo_shader.h" struct ilo_shader_cache { diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 175e7c659d4..45f85e79f4b 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -25,6 +25,7 @@ * Chia-I Wu */ +#include "core/ilo_state_3d.h" #include "util/u_dynarray.h" #include "util/u_helpers.h" #include "util/u_upload_mgr.h" @@ -33,7 +34,6 @@ #include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" -#include "ilo_state_3d.h" static void finalize_shader_states(struct ilo_state_vector *vec) diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 78f2053b07c..fd0a3156ebc 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -28,24 +28,12 @@ #ifndef ILO_STATE_H #define ILO_STATE_H +#include "core/ilo_state_3d.h" #include "pipe/p_state.h" #include "util/u_dynarray.h" #include "ilo_common.h" -/** - * \see brw_context.h - */ -#define ILO_MAX_DRAW_BUFFERS 8 -#define ILO_MAX_CONST_BUFFERS (1 + 12) -#define ILO_MAX_SAMPLER_VIEWS 16 -#define ILO_MAX_SAMPLERS 16 -#define ILO_MAX_SO_BINDINGS 64 -#define ILO_MAX_SO_BUFFERS 4 -#define ILO_MAX_VIEWPORTS 1 - -#define ILO_MAX_SURFACES 256 - /** * States that we track. * @@ -131,246 +119,7 @@ enum ilo_dirty_flags { ILO_DIRTY_ALL = 0xffffffff, }; -struct intel_bo; -struct ilo_buffer; struct ilo_context; -struct ilo_shader_state; -struct ilo_texture; - -struct ilo_vb_state { - struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; - uint32_t enabled_mask; -}; - -struct ilo_ib_state { - struct pipe_resource *buffer; - const void *user_buffer; - unsigned offset; - unsigned index_size; - - /* these are not valid until the state is finalized */ - struct pipe_resource *hw_resource; - unsigned hw_index_size; - /* an offset to be added to pipe_draw_info::start */ - int64_t draw_start_offset; -}; - -struct ilo_ve_cso { - /* VERTEX_ELEMENT_STATE */ - uint32_t payload[2]; -}; - -struct ilo_ve_state { - struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; - unsigned count; - - unsigned instance_divisors[PIPE_MAX_ATTRIBS]; - unsigned vb_mapping[PIPE_MAX_ATTRIBS]; - unsigned vb_count; - - /* these are not valid until the state is finalized */ - struct ilo_ve_cso edgeflag_cso; - bool last_cso_edgeflag; - - struct ilo_ve_cso nosrc_cso; - bool prepend_nosrc_cso; -}; - -struct ilo_so_state { - struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; - unsigned count; - unsigned append_bitmask; - - bool enabled; -}; - -struct ilo_viewport_cso { - /* matrix form */ - float m00, m11, m22, m30, m31, m32; - - /* guardband in NDC space */ - float min_gbx, min_gby, max_gbx, max_gby; - - /* viewport in screen space */ - float min_x, min_y, min_z; - float max_x, max_y, max_z; -}; - -struct ilo_viewport_state { - struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; - unsigned count; - - struct pipe_viewport_state viewport0; -}; - -struct ilo_scissor_state { - /* SCISSOR_RECT */ - uint32_t payload[ILO_MAX_VIEWPORTS * 2]; - - struct pipe_scissor_state scissor0; -}; - -struct ilo_rasterizer_clip { - /* 3DSTATE_CLIP */ - uint32_t payload[3]; - - uint32_t can_enable_guardband; -}; - -struct ilo_rasterizer_sf { - /* 3DSTATE_SF */ - uint32_t payload[3]; - uint32_t dw_msaa; - - /* Global Depth Offset Constant/Scale/Clamp */ - uint32_t dw_depth_offset_const; - uint32_t dw_depth_offset_scale; - uint32_t dw_depth_offset_clamp; - - /* Gen8+ 3DSTATE_RASTER */ - uint32_t dw_raster; -}; - -struct ilo_rasterizer_wm { - /* 3DSTATE_WM */ - uint32_t payload[2]; - uint32_t dw_msaa_rast; - uint32_t dw_msaa_disp; -}; - -struct ilo_rasterizer_state { - struct pipe_rasterizer_state state; - - struct ilo_rasterizer_clip clip; - struct ilo_rasterizer_sf sf; - struct ilo_rasterizer_wm wm; -}; - -struct ilo_dsa_state { - /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */ - uint32_t payload[3]; - - uint32_t dw_blend_alpha; - uint32_t dw_ps_blend_alpha; - ubyte alpha_ref; -}; - -struct ilo_blend_cso { - /* BLEND_STATE */ - uint32_t payload[2]; - - uint32_t dw_blend; - uint32_t dw_blend_dst_alpha_forced_one; -}; - -struct ilo_blend_state { - struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS]; - - bool dual_blend; - bool alpha_to_coverage; - - uint32_t dw_shared; - uint32_t dw_alpha_mod; - uint32_t dw_logicop; - - /* a part of 3DSTATE_PS_BLEND */ - uint32_t dw_ps_blend; - uint32_t dw_ps_blend_dst_alpha_forced_one; -}; - -struct ilo_sampler_cso { - /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ - uint32_t payload[15]; - - uint32_t dw_filter; - uint32_t dw_filter_aniso; - uint32_t dw_wrap; - uint32_t dw_wrap_1d; - uint32_t dw_wrap_cube; - - bool anisotropic; - bool saturate_r; - bool saturate_s; - bool saturate_t; -}; - -struct ilo_sampler_state { - const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; -}; - -struct ilo_view_surface { - /* SURFACE_STATE */ - uint32_t payload[13]; - struct intel_bo *bo; - - uint32_t scanout; -}; - -struct ilo_view_cso { - struct pipe_sampler_view base; - - struct ilo_view_surface surface; -}; - -struct ilo_view_state { - struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; - unsigned count; -}; - -struct ilo_cbuf_cso { - struct pipe_resource *resource; - struct ilo_view_surface surface; - - /* - * this CSO is not so constant because user buffer needs to be uploaded in - * finalize_constant_buffers() - */ - const void *user_buffer; - unsigned user_buffer_size; -}; - -struct ilo_cbuf_state { - struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; - uint32_t enabled_mask; -}; - -struct ilo_resource_state { - struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; - unsigned count; -}; - -struct ilo_surface_cso { - struct pipe_surface base; - - bool is_rt; - union { - struct ilo_view_surface rt; - struct ilo_zs_surface { - uint32_t payload[12]; - uint32_t dw_aligned_8x4; - - struct intel_bo *bo; - struct intel_bo *hiz_bo; - struct intel_bo *separate_s8_bo; - } zs; - } u; -}; - -struct ilo_fb_state { - struct pipe_framebuffer_state state; - - struct ilo_view_surface null_rt; - struct ilo_zs_surface null_zs; - - struct ilo_fb_blend_caps { - bool can_logicop; - bool can_blend; - bool can_alpha_test; - bool dst_alpha_forced_one; - } blend_caps[PIPE_MAX_COLOR_BUFS]; - - unsigned num_samples; -}; struct ilo_global_binding_cso { struct pipe_resource *resource; @@ -396,10 +145,6 @@ struct ilo_global_binding { unsigned count; }; -struct ilo_shader_cso { - uint32_t payload[5]; -}; - struct ilo_state_vector { const struct pipe_draw_info *draw; diff --git a/src/gallium/drivers/ilo/ilo_state_3d.h b/src/gallium/drivers/ilo/ilo_state_3d.h deleted file mode 100644 index b504390d04b..00000000000 --- a/src/gallium/drivers/ilo/ilo_state_3d.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#ifndef ILO_STATE_3D_H -#define ILO_STATE_3D_H - -#include "genhw/genhw.h" -#include "core/intel_winsys.h" - -#include "ilo_common.h" -#include "ilo_state.h" - -/** - * Translate a pipe texture target to the matching hardware surface type. - */ -static inline int -ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) -{ - switch (target) { - case PIPE_BUFFER: - return GEN6_SURFTYPE_BUFFER; - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return GEN6_SURFTYPE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - return GEN6_SURFTYPE_2D; - case PIPE_TEXTURE_3D: - return GEN6_SURFTYPE_3D; - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_CUBE_ARRAY: - return GEN6_SURFTYPE_CUBE; - default: - assert(!"unknown texture target"); - return GEN6_SURFTYPE_BUFFER; - } -} - -void -ilo_gpe_init_ve(const struct ilo_dev *dev, - unsigned num_states, - const struct pipe_vertex_element *states, - struct ilo_ve_state *ve); - -void -ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, - struct ilo_ve_cso *cso); - -void -ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso); - -void -ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, - const struct pipe_viewport_state *state, - struct ilo_viewport_cso *vp); - -void -ilo_gpe_set_scissor(const struct ilo_dev *dev, - unsigned start_slot, - unsigned num_states, - const struct pipe_scissor_state *states, - struct ilo_scissor_state *scissor); - -void -ilo_gpe_set_scissor_null(const struct ilo_dev *dev, - struct ilo_scissor_state *scissor); - -void -ilo_gpe_init_rasterizer(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_state *rasterizer); -void -ilo_gpe_init_dsa(const struct ilo_dev *dev, - const struct pipe_depth_stencil_alpha_state *state, - struct ilo_dsa_state *dsa); - -void -ilo_gpe_init_blend(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend); - -void -ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, - const struct pipe_sampler_state *state, - struct ilo_sampler_cso *sampler); - -void -ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf); - -void -ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf); - -void -ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf); - -void -ilo_gpe_init_zs_surface(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface *zs); - -void -ilo_gpe_init_vs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *vs, - struct ilo_shader_cso *cso); - -void -ilo_gpe_init_gs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso); - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso); - -void -ilo_gpe_set_fb(const struct ilo_dev *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb); - -#endif /* ILO_STATE_3D_H */ diff --git a/src/gallium/drivers/ilo/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/ilo_state_3d_bottom.c deleted file mode 100644 index 13c1a7feead..00000000000 --- a/src/gallium/drivers/ilo/ilo_state_3d_bottom.c +++ /dev/null @@ -1,2225 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "genhw/genhw.h" -#include "core/ilo_format.h" -#include "util/u_dual_blend.h" -#include "util/u_framebuffer.h" -#include "util/u_half.h" - -#include "ilo_context.h" -#include "ilo_resource.h" -#include "ilo_shader.h" -#include "ilo_state.h" -#include "ilo_state_3d.h" - -static void -rasterizer_init_clip(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_clip *clip) -{ - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 6, 8); - - dw1 = GEN6_CLIP_DW1_STATISTICS; - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 219: - * - * "Workaround : Due to Hardware issue "EarlyCull" needs to be - * enabled only for the cases where the incoming primitive topology - * into the clipper guaranteed to be Trilist." - * - * What does this mean? - */ - dw1 |= 0 << 19 | - GEN7_CLIP_DW1_EARLY_CULL_ENABLE; - - if (ilo_dev_gen(dev) < ILO_GEN(8)) { - if (state->front_ccw) - dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW; - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE; - break; - case PIPE_FACE_FRONT: - dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT; - break; - case PIPE_FACE_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH; - break; - } - } - } - - dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | - GEN6_CLIP_DW2_XY_TEST_ENABLE | - state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | - GEN6_CLIP_DW2_CLIPMODE_NORMAL; - - if (state->clip_halfz) - dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; - else - dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; - - if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip) - dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; - - if (state->flatshade_first) { - dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; - } - else { - dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | - 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | - 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; - } - - dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | - 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT; - - clip->payload[0] = dw1; - clip->payload[1] = dw2; - clip->payload[2] = dw3; - - clip->can_enable_guardband = true; - - /* - * There are several reasons that guard band test should be disabled - * - * - GL wide points (to avoid partially visibie object) - * - GL wide or AA lines (to avoid partially visibie object) - */ - if (state->point_size_per_vertex || state->point_size > 1.0f) - clip->can_enable_guardband = false; - if (state->line_smooth || state->line_width > 1.0f) - clip->can_enable_guardband = false; -} - -static void -rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - /* - * Scale the constant term. The minimum representable value used by the HW - * is not large enouch to be the minimum resolvable difference. - */ - sf->dw_depth_offset_const = fui(state->offset_units * 2.0f); - sf->dw_depth_offset_scale = fui(state->offset_scale); - sf->dw_depth_offset_clamp = fui(state->offset_clamp); -} - -static void -rasterizer_init_sf_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - int line_width, point_width; - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) should be set whenever clipping is - * enabled and the Statistics Enable bit is set in CLIP_STATE. It - * should be cleared if clipping is disabled or Statistics Enable in - * CLIP_STATE is clear." - */ - dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_ENABLE; - - /* XXX GEN6 path seems to work fine for GEN7 */ - if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 258: - * - * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset - * Enable Solid , Global Depth Offset Enable Wireframe, and Global - * Depth Offset Enable Point) should be set whenever non zero depth - * bias (Slope, Bias) values are used. Setting this bit may have - * some degradation of performance for some workloads." - */ - if (state->offset_tri || state->offset_line || state->offset_point) { - /* XXX need to scale offset_const according to the depth format */ - dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET; - - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID | - GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | - GEN7_SF_DW1_DEPTH_OFFSET_POINT; - } - } else { - if (state->offset_tri) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; - if (state->offset_line) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; - if (state->offset_point) - dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; - } - - switch (state->fill_front) { - case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID; - break; - case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME; - break; - case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_FRONTFACE_POINT; - break; - } - - switch (state->fill_back) { - case PIPE_POLYGON_MODE_FILL: - dw1 |= GEN7_SF_DW1_BACKFACE_SOLID; - break; - case PIPE_POLYGON_MODE_LINE: - dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME; - break; - case PIPE_POLYGON_MODE_POINT: - dw1 |= GEN7_SF_DW1_BACKFACE_POINT; - break; - } - - if (state->front_ccw) - dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW; - - dw2 = 0; - - if (state->line_smooth) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 251: - * - * "This field (Anti-aliasing Enable) must be disabled if any of the - * render targets have integer (UINT or SINT) surface format." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "This field (Hierarchical Depth Buffer Enable) must be disabled - * if Anti-aliasing Enable in 3DSTATE_SF is enabled. - * - * TODO We do not check those yet. - */ - dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE | - GEN7_SF_DW2_AA_LINE_CAP_1_0; - } - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw2 |= GEN7_SF_DW2_CULLMODE_NONE; - break; - case PIPE_FACE_FRONT: - dw2 |= GEN7_SF_DW2_CULLMODE_FRONT; - break; - case PIPE_FACE_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BACK; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw2 |= GEN7_SF_DW2_CULLMODE_BOTH; - break; - } - - /* - * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) - * pixels in the minor direction. We have to make the lines slightly - * thicker, 0.5 pixel on both sides, so that they intersect that many - * pixels are considered into the lines. - * - * Line width is in U3.7. - */ - line_width = (int) - ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); - line_width = CLAMP(line_width, 0, 1023); - - /* use GIQ rules */ - if (line_width == 128 && !state->line_smooth) - line_width = 0; - - dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - - if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable) - dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; - - if (state->scissor) - dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; - - dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | - GEN7_SF_DW3_SUBPIXEL_8BITS; - - if (state->line_last_pixel) - dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; - - if (state->flatshade_first) { - dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } else { - dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } - - if (!state->point_size_per_vertex) - dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; - - /* in U8.3 */ - point_width = (int) (state->point_size * 8.0f + 0.5f); - point_width = CLAMP(point_width, 1, 2047); - - dw3 |= point_width; - - STATIC_ASSERT(Elements(sf->payload) >= 3); - sf->payload[0] = dw1; - sf->payload[1] = dw2; - sf->payload[2] = dw3; - - if (state->multisample) { - sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 251: - * - * "Software must not program a value of 0.0 when running in - * MSRASTMODE_ON_xxx modes - zero-width lines are not available - * when multisampling rasterization is enabled." - */ - if (!line_width) { - line_width = 128; /* 1.0f */ - - sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - } - } else { - sf->dw_msaa = 0; - } - - rasterizer_init_sf_depth_offset_gen6(dev, state, sf); - /* 3DSTATE_RASTER is Gen8+ only */ - sf->dw_raster = 0; -} - -static uint32_t -rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->front_ccw) - dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW; - - switch (state->cull_face) { - case PIPE_FACE_NONE: - dw |= GEN8_RASTER_DW1_CULLMODE_NONE; - break; - case PIPE_FACE_FRONT: - dw |= GEN8_RASTER_DW1_CULLMODE_FRONT; - break; - case PIPE_FACE_BACK: - dw |= GEN8_RASTER_DW1_CULLMODE_BACK; - break; - case PIPE_FACE_FRONT_AND_BACK: - dw |= GEN8_RASTER_DW1_CULLMODE_BOTH; - break; - } - - if (state->point_smooth) - dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE; - - if (state->multisample) - dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE; - - if (state->offset_tri) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID; - if (state->offset_line) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME; - if (state->offset_point) - dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT; - - switch (state->fill_front) { - case PIPE_POLYGON_MODE_FILL: - dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID; - break; - case PIPE_POLYGON_MODE_LINE: - dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME; - break; - case PIPE_POLYGON_MODE_POINT: - dw |= GEN8_RASTER_DW1_FRONTFACE_POINT; - break; - } - - switch (state->fill_back) { - case PIPE_POLYGON_MODE_FILL: - dw |= GEN8_RASTER_DW1_BACKFACE_SOLID; - break; - case PIPE_POLYGON_MODE_LINE: - dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME; - break; - case PIPE_POLYGON_MODE_POINT: - dw |= GEN8_RASTER_DW1_BACKFACE_POINT; - break; - } - - if (state->line_smooth) - dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE; - - if (state->scissor) - dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE; - - if (state->depth_clip) - dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE; - - return dw; -} - -static void -rasterizer_init_sf_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_sf *sf) -{ - int line_width, point_width; - uint32_t dw1, dw2, dw3; - - ILO_DEV_ASSERT(dev, 8, 8); - - /* in U3.7 */ - line_width = (int) - ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); - line_width = CLAMP(line_width, 0, 1023); - - /* use GIQ rules */ - if (line_width == 128 && !state->line_smooth) - line_width = 0; - - /* in U8.3 */ - point_width = (int) (state->point_size * 8.0f + 0.5f); - point_width = CLAMP(point_width, 1, 2047); - - dw1 = GEN7_SF_DW1_STATISTICS | - GEN7_SF_DW1_VIEWPORT_ENABLE; - - dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; - if (state->line_smooth) - dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0; - - dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | - GEN7_SF_DW3_SUBPIXEL_8BITS | - point_width; - - if (state->line_last_pixel) - dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; - - if (state->flatshade_first) { - dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } else { - dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | - 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | - 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; - } - - if (!state->point_size_per_vertex) - dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; - - dw3 |= point_width; - - STATIC_ASSERT(Elements(sf->payload) >= 3); - sf->payload[0] = dw1; - sf->payload[1] = dw2; - sf->payload[2] = dw3; - - rasterizer_init_sf_depth_offset_gen6(dev, state, sf); - - sf->dw_msaa = 0; - sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state); -} - -static void -rasterizer_init_wm_gen6(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_wm *wm) -{ - uint32_t dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - /* only the FF unit states are set, as in GEN7 */ - - dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; - - /* - * assertion that makes sure - * - * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp; - * - * is valid - */ - STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 && - GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); - dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL; - - if (state->bottom_edge_rule) - dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; - - wm->dw_msaa_rast = - (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0; - wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; - - STATIC_ASSERT(Elements(wm->payload) >= 2); - wm->payload[0] = dw5; - wm->payload[1] = dw6; -} - -static void -rasterizer_init_wm_gen7(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_wm *wm) -{ - uint32_t dw1, dw2; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - /* - * assertion that makes sure - * - * dw1 |= wm->dw_msaa_rast; - * dw2 |= wm->dw_msaa_disp; - * - * is valid - */ - STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 && - GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); - dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL | - GEN7_WM_DW1_AA_LINE_WIDTH_2_0; - dw2 = 0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; - - if (state->bottom_edge_rule) - dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; - - wm->dw_msaa_rast = - (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0; - wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; - - STATIC_ASSERT(Elements(wm->payload) >= 2); - wm->payload[0] = dw1; - wm->payload[1] = dw2; -} - -static uint32_t -rasterizer_get_wm_gen8(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - dw = GEN7_WM_DW1_ZW_INTERP_PIXEL | - GEN7_WM_DW1_AA_LINE_WIDTH_2_0; - - /* same value as in 3DSTATE_SF */ - if (state->line_smooth) - dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0; - - if (state->poly_stipple_enable) - dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; - if (state->line_stipple_enable) - dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; - - if (state->bottom_edge_rule) - dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; - - return dw; -} - -void -ilo_gpe_init_rasterizer(const struct ilo_dev *dev, - const struct pipe_rasterizer_state *state, - struct ilo_rasterizer_state *rasterizer) -{ - rasterizer_init_clip(dev, state, &rasterizer->clip); - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - memset(&rasterizer->wm, 0, sizeof(rasterizer->wm)); - rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state); - - rasterizer_init_sf_gen8(dev, state, &rasterizer->sf); - } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - rasterizer_init_wm_gen7(dev, state, &rasterizer->wm); - rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); - } else { - rasterizer_init_wm_gen6(dev, state, &rasterizer->wm); - rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); - } -} - -static void -fs_init_cso_gen6(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) -{ - int start_grf, input_count, sampler_count, interps, max_threads; - uint32_t dw2, dw4, dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - interps = ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); - - /* see brwCreateContext() */ - max_threads = (dev->gt == 2) ? 80 : 40; - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; - - dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the - * PS kernel or color calculator has the ability to kill (discard) - * pixels or samples, other than due to depth or stencil testing. - * This bit is required to be ENABLED in the following situations: - * - * The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that can - * cause the final pixel mask to differ from the pixel mask received - * on dispatch. - * - * A sampler with chroma key enabled with kill pixel mode is used by - * the pixel shader. - * - * Any render target has Alpha Test Enable or AlphaToCoverage Enable - * enabled. - * - * The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware and - * therefore not via PS instructions, there should be no need to - * ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth - * field must be set to disabled." - * - * TODO This is not checked yet. - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw5 |= GEN6_WM_DW5_PS_USE_W; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - if (true) - dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; - - dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_WM_DW6_PS_POSOFFSET_NONE | - interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = dw6; -} - -static uint32_t -fs_get_wm_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - dw = ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << - GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 278: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that - * the PS kernel or color calculator has the ability to kill - * (discard) pixels or samples, other than due to depth or stencil - * testing. This bit is required to be ENABLED in the following - * situations: - * - * - The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that - * can cause the final pixel mask to differ from the pixel mask - * received on dispatch. - * - * - A sampler with chroma key enabled with kill pixel mode is used - * by the pixel shader. - * - * - Any render target has Alpha Test Enable or AlphaToCoverage - * Enable enabled. - * - * - The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware - * and therefore not via PS instructions, there should be no need - * to ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN7_WM_DW1_PS_KILL_PIXEL; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_WM_DW1_PSCDEPTH_ON; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN7_WM_DW1_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN7_WM_DW1_PS_USE_W; - - return dw; -} - -static void -fs_init_cso_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) -{ - int start_grf, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = GEN7_PS_DW4_POSOFFSET_NONE; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(dev)) { - case ILO_GEN(7.5): - max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw4 |= GEN7_PS_DW4_ATTR_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; - - dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = fs_get_wm_gen7(dev, fs); -} - -static uint32_t -fs_get_psx_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - dw = GEN8_PSX_DW1_DISPATCH_ENABLE; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN8_PSX_DW1_KILL_PIXEL; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN8_PSX_DW1_PSCDEPTH_ON; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN8_PSX_DW1_USE_DEPTH; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN8_PSX_DW1_USE_W; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw |= GEN8_PSX_DW1_ATTR_ENABLE; - - return dw; -} - -static uint32_t -fs_get_wm_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - ILO_DEV_ASSERT(dev, 8, 8); - - return ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << - GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; -} - -static void -fs_init_cso_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) -{ - int start_grf, sampler_count; - uint32_t dw3, dw6, dw7; - - ILO_DEV_ASSERT(dev, 8, 8); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* always 64? */ - dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | - GEN8_PS_DW6_POSOFFSET_NONE; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; - - dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 5); - cso->payload[0] = dw3; - cso->payload[1] = dw6; - cso->payload[2] = dw7; - cso->payload[3] = fs_get_psx_gen8(dev, fs); - cso->payload[4] = fs_get_wm_gen8(dev, fs); -} - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - struct ilo_shader_cso *cso) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - fs_init_cso_gen8(dev, fs, cso); - else if (ilo_dev_gen(dev) >= ILO_GEN(7)) - fs_init_cso_gen7(dev, fs, cso); - else - fs_init_cso_gen6(dev, fs, cso); -} - -struct ilo_zs_surface_info { - int surface_type; - int format; - - struct { - struct intel_bo *bo; - unsigned stride; - unsigned qpitch; - enum gen_surface_tiling tiling; - uint32_t offset; - } zs, stencil, hiz; - - unsigned width, height, depth; - unsigned lod, first_layer, num_layers; -}; - -static void -zs_init_info_null(const struct ilo_dev *dev, - struct ilo_zs_surface_info *info) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - memset(info, 0, sizeof(*info)); - - info->surface_type = GEN6_SURFTYPE_NULL; - info->format = GEN6_ZFORMAT_D32_FLOAT; - info->width = 1; - info->height = 1; - info->depth = 1; - info->num_layers = 1; -} - -static void -zs_init_info(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface_info *info) -{ - bool separate_stencil; - - ILO_DEV_ASSERT(dev, 6, 8); - - memset(info, 0, sizeof(*info)); - - info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - - if (info->surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 325-326: - * - * "For Other Surfaces (Cube Surfaces): - * This field (Minimum Array Element) is ignored." - * - * "For Other Surfaces (Cube Surfaces): - * This field (Render Target View Extent) is ignored." - * - * As such, we cannot set first_layer and num_layers on cube surfaces. - * To work around that, treat it as a 2D surface. - */ - info->surface_type = GEN6_SURFTYPE_2D; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - separate_stencil = true; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "This field (Separate Stencil Buffer Enable) must be set to the - * same value (enabled or disabled) as Hierarchical Depth Buffer - * Enable." - */ - separate_stencil = - ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers); - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "If this field (Hierarchical Depth Buffer Enable) is enabled, the - * Surface Format of the depth buffer cannot be - * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil - * requires the separate stencil buffer." - * - * From the Ironlake PRM, volume 2 part 1, page 330: - * - * "If this field (Separate Stencil Buffer Enable) is disabled, the - * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." - * - * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT - * is indeed used, the depth values output by the fragment shaders will - * be different when read back. - * - * As for GEN7+, separate_stencil is always true. - */ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - info->format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - info->format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - info->format = (separate_stencil) ? - GEN6_ZFORMAT_D24_UNORM_X8_UINT : - GEN6_ZFORMAT_D24_UNORM_S8_UINT; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - info->format = (separate_stencil) ? - GEN6_ZFORMAT_D32_FLOAT : - GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; - break; - case PIPE_FORMAT_S8_UINT: - if (separate_stencil) { - info->format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - /* fall through */ - default: - assert(!"unsupported depth/stencil format"); - zs_init_info_null(dev, info); - return; - break; - } - - if (format != PIPE_FORMAT_S8_UINT) { - info->zs.bo = tex->image.bo; - info->zs.stride = tex->image.bo_stride; - - assert(tex->image.layer_height % 4 == 0); - info->zs.qpitch = tex->image.layer_height / 4; - - info->zs.tiling = tex->image.tiling; - info->zs.offset = 0; - } - - if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) { - const struct ilo_texture *s8_tex = - (tex->separate_s8) ? tex->separate_s8 : tex; - - info->stencil.bo = s8_tex->image.bo; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 329: - * - * "The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved." - * - * For GEN7, we still dobule the stride because we did not double the - * slice widths when initializing the layout. - */ - info->stencil.stride = s8_tex->image.bo_stride * 2; - - assert(s8_tex->image.layer_height % 4 == 0); - info->stencil.qpitch = s8_tex->image.layer_height / 4; - - info->stencil.tiling = s8_tex->image.tiling; - - if (ilo_dev_gen(dev) == ILO_GEN(6)) { - unsigned x, y; - - assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD); - - /* offset to the level */ - ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y); - ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y); - info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y); - } - } - - if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) { - info->hiz.bo = tex->image.aux_bo; - info->hiz.stride = tex->image.aux_stride; - - assert(tex->image.aux_layer_height % 4 == 0); - info->hiz.qpitch = tex->image.aux_layer_height / 4; - - info->hiz.tiling = GEN6_TILING_Y; - - /* offset to the level */ - if (ilo_dev_gen(dev) == ILO_GEN(6)) - info->hiz.offset = tex->image.aux_offsets[level]; - } - - info->width = tex->image.width0; - info->height = tex->image.height0; - info->depth = (tex->base.target == PIPE_TEXTURE_3D) ? - tex->base.depth0 : num_layers; - - info->lod = level; - info->first_layer = first_layer; - info->num_layers = num_layers; -} - -void -ilo_gpe_init_zs_surface(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, unsigned level, - unsigned first_layer, unsigned num_layers, - struct ilo_zs_surface *zs) -{ - const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; - const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; - struct ilo_zs_surface_info info; - uint32_t dw1, dw2, dw3, dw4, dw5, dw6; - int align_w = 8, align_h = 4; - - ILO_DEV_ASSERT(dev, 6, 8); - - if (tex) { - zs_init_info(dev, tex, format, level, first_layer, num_layers, &info); - - switch (tex->base.nr_samples) { - case 2: - align_w /= 2; - break; - case 4: - align_w /= 2; - align_h /= 2; - break; - case 8: - align_w /= 4; - align_h /= 2; - break; - case 16: - align_w /= 4; - align_h /= 4; - break; - default: - break; - } - } else { - zs_init_info_null(dev, &info); - } - - switch (info.surface_type) { - case GEN6_SURFTYPE_NULL: - break; - case GEN6_SURFTYPE_1D: - assert(info.width <= max_2d_size && info.height == 1 && - info.depth <= max_array_size); - assert(info.first_layer < max_array_size - 1 && - info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_2D: - assert(info.width <= max_2d_size && info.height <= max_2d_size && - info.depth <= max_array_size); - assert(info.first_layer < max_array_size - 1 && - info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_3D: - assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); - assert(info.first_layer < 2048 && info.num_layers <= max_array_size); - break; - case GEN6_SURFTYPE_CUBE: - assert(info.width <= max_2d_size && info.height <= max_2d_size && - info.depth == 1); - assert(info.first_layer == 0 && info.num_layers == 1); - assert(info.width == info.height); - break; - default: - assert(!"unexpected depth surface type"); - break; - } - - dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT | - info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT; - - if (info.zs.bo) { - /* required for GEN6+ */ - assert(info.zs.tiling == GEN6_TILING_Y); - assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 && - info.zs.stride % 128 == 0); - assert(info.width <= info.zs.stride); - - dw1 |= (info.zs.stride - 1); - dw2 = info.zs.offset; - } else { - dw2 = 0; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - if (info.zs.bo) - dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; - - if (info.stencil.bo) - dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE; - - if (info.hiz.bo) - dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; - - dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | - (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; - - zs->dw_aligned_8x4 = - (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | - (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; - - dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT | - info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT; - - dw5 = 0; - - dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - dw6 |= info.zs.qpitch; - } else { - /* always Y-tiled */ - dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT; - - if (info.hiz.bo) { - dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | - GEN6_DEPTH_DW1_SEPARATE_STENCIL; - } - - dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | - (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | - GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; - - zs->dw_aligned_8x4 = - (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | - (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | - info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | - GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; - - dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT | - info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT | - (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT; - - dw5 = 0; - - dw6 = 0; - } - - STATIC_ASSERT(Elements(zs->payload) >= 12); - - zs->payload[0] = dw1; - zs->payload[1] = dw2; - zs->payload[2] = dw3; - zs->payload[3] = dw4; - zs->payload[4] = dw5; - zs->payload[5] = dw6; - - /* do not increment reference count */ - zs->bo = info.zs.bo; - - /* separate stencil */ - if (info.stencil.bo) { - assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && - info.stencil.stride % 128 == 0); - - dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT; - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) - dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; - - dw2 = info.stencil.offset; - dw4 = info.stencil.qpitch; - } else { - dw1 = 0; - dw2 = 0; - dw4 = 0; - } - - zs->payload[6] = dw1; - zs->payload[7] = dw2; - zs->payload[8] = dw4; - /* do not increment reference count */ - zs->separate_s8_bo = info.stencil.bo; - - /* hiz */ - if (info.hiz.bo) { - dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT; - dw2 = info.hiz.offset; - dw4 = info.hiz.qpitch; - } else { - dw1 = 0; - dw2 = 0; - dw4 = 0; - } - - zs->payload[9] = dw1; - zs->payload[10] = dw2; - zs->payload[11] = dw4; - /* do not increment reference count */ - zs->hiz_bo = info.hiz.bo; -} - -static void -viewport_get_guardband(const struct ilo_dev *dev, - int center_x, int center_y, - int *min_gbx, int *max_gbx, - int *min_gby, int *max_gby) -{ - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 234: - * - * "Per-Device Guardband Extents - * - * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] - * - Maximum Post-Clamp Delta (X or Y): 16K" - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * From the Ivy Bridge PRM, volume 2 part 1, page 248: - * - * "Per-Device Guardband Extents - * - * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] - * - Maximum Post-Clamp Delta (X or Y): N/A" - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * Combined, the bounding box of any object can not exceed 8K in both - * width and height. - * - * Below we set the guardband as a squre of length 8K, centered at where - * the viewport is. This makes sure all objects passing the GB test are - * valid to the renderer, and those failing the XY clipping have a - * better chance of passing the GB test. - */ - const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384; - const int half_len = 8192 / 2; - - /* make sure the guardband is within the valid range */ - if (center_x - half_len < -max_extent) - center_x = -max_extent + half_len; - else if (center_x + half_len > max_extent - 1) - center_x = max_extent - half_len; - - if (center_y - half_len < -max_extent) - center_y = -max_extent + half_len; - else if (center_y + half_len > max_extent - 1) - center_y = max_extent - half_len; - - *min_gbx = (float) (center_x - half_len); - *max_gbx = (float) (center_x + half_len); - *min_gby = (float) (center_y - half_len); - *max_gby = (float) (center_y + half_len); -} - -void -ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, - const struct pipe_viewport_state *state, - struct ilo_viewport_cso *vp) -{ - const float scale_x = fabs(state->scale[0]); - const float scale_y = fabs(state->scale[1]); - const float scale_z = fabs(state->scale[2]); - int min_gbx, max_gbx, min_gby, max_gby; - - ILO_DEV_ASSERT(dev, 6, 8); - - viewport_get_guardband(dev, - (int) state->translate[0], - (int) state->translate[1], - &min_gbx, &max_gbx, &min_gby, &max_gby); - - /* matrix form */ - vp->m00 = state->scale[0]; - vp->m11 = state->scale[1]; - vp->m22 = state->scale[2]; - vp->m30 = state->translate[0]; - vp->m31 = state->translate[1]; - vp->m32 = state->translate[2]; - - /* guardband in NDC space */ - vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; - vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; - vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; - vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; - - /* viewport in screen space */ - vp->min_x = scale_x * -1.0f + state->translate[0]; - vp->max_x = scale_x * 1.0f + state->translate[0]; - vp->min_y = scale_y * -1.0f + state->translate[1]; - vp->max_y = scale_y * 1.0f + state->translate[1]; - vp->min_z = scale_z * -1.0f + state->translate[2]; - vp->max_z = scale_z * 1.0f + state->translate[2]; -} - -/** - * Translate a pipe logicop to the matching hardware logicop. - */ -static int -gen6_translate_pipe_logicop(unsigned logicop) -{ - switch (logicop) { - case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; - case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; - case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; - case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; - case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; - case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; - case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; - case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; - case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; - case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; - case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; - case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; - case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; - case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; - case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; - case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; - default: - assert(!"unknown logicop function"); - return GEN6_LOGICOP_CLEAR; - } -} - -/** - * Translate a pipe blend function to the matching hardware blend function. - */ -static int -gen6_translate_pipe_blend(unsigned blend) -{ - switch (blend) { - case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; - case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; - case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; - case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; - default: - assert(!"unknown blend function"); - return GEN6_BLENDFUNCTION_ADD; - }; -} - -/** - * Translate a pipe blend factor to the matching hardware blend factor. - */ -static int -gen6_translate_pipe_blendfactor(unsigned blendfactor) -{ - switch (blendfactor) { - case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; - case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; - default: - assert(!"unknown blend factor"); - return GEN6_BLENDFACTOR_ONE; - }; -} - -/** - * Translate a pipe stencil op to the matching hardware stencil op. - */ -static int -gen6_translate_pipe_stencil_op(unsigned stencil_op) -{ - switch (stencil_op) { - case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; - default: - assert(!"unknown stencil op"); - return GEN6_STENCILOP_KEEP; - } -} - -static int -gen6_blend_factor_dst_alpha_forced_one(int factor) -{ - switch (factor) { - case GEN6_BLENDFACTOR_DST_ALPHA: - return GEN6_BLENDFACTOR_ONE; - case GEN6_BLENDFACTOR_INV_DST_ALPHA: - case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: - return GEN6_BLENDFACTOR_ZERO; - default: - return factor; - } -} - -static uint32_t -blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev, - const struct pipe_rt_blend_state *rt, - bool dst_alpha_forced_one) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!rt->blend_enable) - return 0; - - rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); - rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); - a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); - a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); - - if (dst_alpha_forced_one) { - rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); - rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); - a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); - a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); - } - - dw = GEN6_RT_DW0_BLEND_ENABLE | - gen6_translate_pipe_blend(rt->alpha_func) << 26 | - a_src << 20 | - a_dst << 15 | - gen6_translate_pipe_blend(rt->rgb_func) << 11 | - rgb_src << 5 | - rgb_dst; - - if (rt->rgb_func != rt->alpha_func || - rgb_src != a_src || rgb_dst != a_dst) - dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE; - - return dw; -} - -static uint32_t -blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev, - const struct pipe_rt_blend_state *rt, - bool dst_alpha_forced_one, - bool *independent_alpha) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!rt->blend_enable) { - *independent_alpha = false; - return 0; - } - - rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); - rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); - a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); - a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); - - if (dst_alpha_forced_one) { - rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); - rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); - a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); - a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); - } - - dw = GEN8_RT_DW0_BLEND_ENABLE | - rgb_src << 26 | - rgb_dst << 21 | - gen6_translate_pipe_blend(rt->rgb_func) << 18 | - a_src << 13 | - a_dst << 8 | - gen6_translate_pipe_blend(rt->alpha_func) << 5; - - *independent_alpha = (rt->rgb_func != rt->alpha_func || - rgb_src != a_src || - rgb_dst != a_dst); - - return dw; -} - -static void -blend_init_cso_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend, - unsigned index) -{ - const struct pipe_rt_blend_state *rt = &state->rt[index]; - struct ilo_blend_cso *cso = &blend->cso[index]; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - cso->payload[0] = 0; - cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | - GEN6_RT_DW1_PRE_BLEND_CLAMP | - GEN6_RT_DW1_POST_BLEND_CLAMP; - - if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A; - if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R; - if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G; - if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Color Buffer Blending and Logic Ops must not be enabled - * simultaneously, or behavior is UNDEFINED." - * - * Since state->logicop_enable takes precedence over rt->blend_enable, - * no special care is needed. - */ - if (state->logicop_enable) { - cso->dw_blend = 0; - cso->dw_blend_dst_alpha_forced_one = 0; - } else { - cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false); - cso->dw_blend_dst_alpha_forced_one = - blend_get_rt_blend_enable_gen6(dev, rt, true); - } -} - -static bool -blend_init_cso_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend, - unsigned index) -{ - const struct pipe_rt_blend_state *rt = &state->rt[index]; - struct ilo_blend_cso *cso = &blend->cso[index]; - bool independent_alpha = false; - - ILO_DEV_ASSERT(dev, 8, 8); - - cso->payload[0] = 0; - cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT | - GEN8_RT_DW1_PRE_BLEND_CLAMP | - GEN8_RT_DW1_POST_BLEND_CLAMP; - - if (!(rt->colormask & PIPE_MASK_A)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A; - if (!(rt->colormask & PIPE_MASK_R)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R; - if (!(rt->colormask & PIPE_MASK_G)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G; - if (!(rt->colormask & PIPE_MASK_B)) - cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B; - - if (state->logicop_enable) { - cso->dw_blend = 0; - cso->dw_blend_dst_alpha_forced_one = 0; - } else { - bool tmp[2]; - - cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]); - cso->dw_blend_dst_alpha_forced_one = - blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]); - - if (tmp[0] || tmp[1]) - independent_alpha = true; - } - - return independent_alpha; -} - -static uint32_t -blend_get_logicop_enable_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state) -{ - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!state->logicop_enable) - return 0; - - return GEN6_RT_DW1_LOGICOP_ENABLE | - gen6_translate_pipe_logicop(state->logicop_func) << 18; -} - -static uint32_t -blend_get_logicop_enable_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state) -{ - ILO_DEV_ASSERT(dev, 8, 8); - - if (!state->logicop_enable) - return 0; - - return GEN8_RT_DW1_LOGICOP_ENABLE | - gen6_translate_pipe_logicop(state->logicop_func) << 27; -} - -static uint32_t -blend_get_alpha_mod_gen6(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - bool dual_blend) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (state->alpha_to_coverage) { - dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE; - if (ilo_dev_gen(dev) >= ILO_GEN(7)) - dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER; - } - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 378: - * - * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) - * must be disabled." - */ - if (state->alpha_to_one && !dual_blend) - dw |= GEN6_RT_DW1_ALPHA_TO_ONE; - - return dw; -} - -static uint32_t -blend_get_alpha_mod_gen8(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - bool dual_blend) -{ - uint32_t dw = 0; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->alpha_to_coverage) { - dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE | - GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER; - } - - if (state->alpha_to_one && !dual_blend) - dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE; - - return dw; -} - -static uint32_t -blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0) -{ - int rgb_src, rgb_dst, a_src, a_dst; - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE)) - return 0; - - a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR); - a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR); - rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR); - rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR); - - dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE; - dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR); - dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR); - dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR); - dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR); - - if (a_src != rgb_src || a_dst != rgb_dst) - dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE; - - return dw; -} - -void -ilo_gpe_init_blend(const struct ilo_dev *dev, - const struct pipe_blend_state *state, - struct ilo_blend_state *blend) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - blend->dual_blend = (util_blend_state_is_dual(state, 0) && - state->rt[0].blend_enable && - !state->logicop_enable); - blend->alpha_to_coverage = state->alpha_to_coverage; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - bool independent_alpha; - - blend->dw_alpha_mod = - blend_get_alpha_mod_gen8(dev, state, blend->dual_blend); - blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state); - blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0; - - independent_alpha = blend_init_cso_gen8(dev, state, blend, 0); - if (independent_alpha) - blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; - - blend->dw_ps_blend = blend_get_ps_blend_gen8(dev, - blend->cso[0].dw_blend); - blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev, - blend->cso[0].dw_blend_dst_alpha_forced_one); - - if (state->independent_blend_enable) { - for (i = 1; i < Elements(blend->cso); i++) { - independent_alpha = blend_init_cso_gen8(dev, state, blend, i); - if (independent_alpha) - blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; - } - } else { - for (i = 1; i < Elements(blend->cso); i++) - blend->cso[i] = blend->cso[0]; - } - } else { - blend->dw_alpha_mod = - blend_get_alpha_mod_gen6(dev, state, blend->dual_blend); - blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state); - blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0; - - blend->dw_ps_blend = 0; - blend->dw_ps_blend_dst_alpha_forced_one = 0; - - blend_init_cso_gen6(dev, state, blend, 0); - if (state->independent_blend_enable) { - for (i = 1; i < Elements(blend->cso); i++) - blend_init_cso_gen6(dev, state, blend, i); - } else { - for (i = 1; i < Elements(blend->cso); i++) - blend->cso[i] = blend->cso[0]; - } - } -} - -/** - * Translate a pipe DSA test function to the matching hardware compare - * function. - */ -static int -gen6_translate_dsa_func(unsigned func) -{ - switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; - default: - assert(!"unknown depth/stencil/alpha test function"); - return GEN6_COMPAREFUNCTION_NEVER; - } -} - -static uint32_t -dsa_get_stencil_enable_gen6(const struct ilo_dev *dev, - const struct pipe_stencil_state *stencil0, - const struct pipe_stencil_state *stencil1) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!stencil0->enabled) - return 0; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 359: - * - * "If the Depth Buffer is either undefined or does not have a surface - * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate - * stencil buffer is disabled, Stencil Test Enable must be DISABLED" - * - * From the Sandy Bridge PRM, volume 2 part 1, page 370: - * - * "This field (Stencil Test Enable) cannot be enabled if - * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." - * - * TODO We do not check these yet. - */ - dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE | - gen6_translate_dsa_func(stencil0->func) << 28 | - gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 | - gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 | - gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19; - if (stencil0->writemask) - dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; - - if (stencil1->enabled) { - dw |= GEN6_ZS_DW0_STENCIL1_ENABLE | - gen6_translate_dsa_func(stencil1->func) << 12 | - gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 | - gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 | - gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3; - if (stencil1->writemask) - dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; - } - - return dw; -} - -static uint32_t -dsa_get_stencil_enable_gen8(const struct ilo_dev *dev, - const struct pipe_stencil_state *stencil0, - const struct pipe_stencil_state *stencil1) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!stencil0->enabled) - return 0; - - dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 | - gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 | - gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 | - gen6_translate_dsa_func(stencil0->func) << 8 | - GEN8_ZS_DW1_STENCIL_TEST_ENABLE; - if (stencil0->writemask) - dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; - - if (stencil1->enabled) { - dw |= gen6_translate_dsa_func(stencil1->func) << 20 | - gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 | - gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 | - gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 | - GEN8_ZS_DW1_STENCIL1_ENABLE; - if (stencil1->writemask) - dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; - } - - return dw; -} - -static uint32_t -dsa_get_depth_enable_gen6(const struct ilo_dev *dev, - const struct pipe_depth_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 360: - * - * "Enabling the Depth Test function without defining a Depth Buffer is - * UNDEFINED." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 375: - * - * "A Depth Buffer must be defined before enabling writes to it, or - * operation is UNDEFINED." - * - * TODO We do not check these yet. - */ - if (state->enabled) { - dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 27; - } else { - dw = GEN6_COMPAREFUNCTION_ALWAYS << 27; - } - - if (state->writemask) - dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE; - - return dw; -} - -static uint32_t -dsa_get_depth_enable_gen8(const struct ilo_dev *dev, - const struct pipe_depth_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (state->enabled) { - dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 5; - } else { - dw = GEN6_COMPAREFUNCTION_ALWAYS << 5; - } - - if (state->writemask) - dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE; - - return dw; -} - -static uint32_t -dsa_get_alpha_enable_gen6(const struct ilo_dev *dev, - const struct pipe_alpha_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - if (!state->enabled) - return 0; - - /* this will be ORed to BLEND_STATE */ - dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 13; - - return dw; -} - -static uint32_t -dsa_get_alpha_enable_gen8(const struct ilo_dev *dev, - const struct pipe_alpha_state *state) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - if (!state->enabled) - return 0; - - /* this will be ORed to BLEND_STATE */ - dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE | - gen6_translate_dsa_func(state->func) << 24; - - return dw; -} - -void -ilo_gpe_init_dsa(const struct ilo_dev *dev, - const struct pipe_depth_stencil_alpha_state *state, - struct ilo_dsa_state *dsa) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - STATIC_ASSERT(Elements(dsa->payload) >= 3); - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev, - &state->stencil[0], &state->stencil[1]); - const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth); - - assert(!(dw_stencil & dw_depth)); - dsa->payload[0] = dw_stencil | dw_depth; - - dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha); - dsa->dw_ps_blend_alpha = (state->alpha.enabled) ? - GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0; - } else { - dsa->payload[0] = dsa_get_stencil_enable_gen6(dev, - &state->stencil[0], &state->stencil[1]); - dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth); - - dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha); - dsa->dw_ps_blend_alpha = 0; - } - - dsa->payload[1] = state->stencil[0].valuemask << 24 | - state->stencil[0].writemask << 16 | - state->stencil[1].valuemask << 8 | - state->stencil[1].writemask; - - dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value); -} - -void -ilo_gpe_set_scissor(const struct ilo_dev *dev, - unsigned start_slot, - unsigned num_states, - const struct pipe_scissor_state *states, - struct ilo_scissor_state *scissor) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - for (i = 0; i < num_states; i++) { - uint16_t min_x, min_y, max_x, max_y; - - /* both max and min are inclusive in SCISSOR_RECT */ - if (states[i].minx < states[i].maxx && - states[i].miny < states[i].maxy) { - min_x = states[i].minx; - min_y = states[i].miny; - max_x = states[i].maxx - 1; - max_y = states[i].maxy - 1; - } - else { - /* we have to make min greater than max */ - min_x = 1; - min_y = 1; - max_x = 0; - max_y = 0; - } - - scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x; - scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x; - } - - if (!start_slot && num_states) - scissor->scissor0 = states[0]; -} - -void -ilo_gpe_set_scissor_null(const struct ilo_dev *dev, - struct ilo_scissor_state *scissor) -{ - unsigned i; - - for (i = 0; i < Elements(scissor->payload); i += 2) { - scissor->payload[i + 0] = 1 << 16 | 1; - scissor->payload[i + 1] = 0; - } -} - -static void -fb_set_blend_caps(const struct ilo_dev *dev, - enum pipe_format format, - struct ilo_fb_blend_caps *caps) -{ - const struct util_format_description *desc = - util_format_description(format); - const int ch = util_format_get_first_non_void_channel(format); - - memset(caps, 0, sizeof(*caps)); - - if (format == PIPE_FORMAT_NONE || desc->is_mixed) - return; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB - * variants), otherwise Logic Ops must be DISABLED." - * - * According to the classic driver, this is lifted on Gen8+. - */ - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - caps->can_logicop = true; - } else { - caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized && - desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && - desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); - } - - /* no blending for pure integer formats */ - caps->can_blend = !util_format_is_pure_integer(format); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - caps->can_alpha_test = !util_format_is_pure_integer(format); - - caps->dst_alpha_forced_one = - (ilo_format_translate_render(dev, format) != - ilo_format_translate_color(dev, format)); - - /* sanity check */ - if (caps->dst_alpha_forced_one) { - enum pipe_format render_format; - - switch (format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - render_format = PIPE_FORMAT_B8G8R8A8_UNORM; - break; - default: - render_format = PIPE_FORMAT_NONE; - break; - } - - assert(ilo_format_translate_render(dev, format) == - ilo_format_translate_color(dev, render_format)); - } -} - -void -ilo_gpe_set_fb(const struct ilo_dev *dev, - const struct pipe_framebuffer_state *state, - struct ilo_fb_state *fb) -{ - const struct pipe_surface *first_surf = NULL; - int i; - - ILO_DEV_ASSERT(dev, 6, 8); - - util_copy_framebuffer_state(&fb->state, state); - - ilo_gpe_init_view_surface_null(dev, - (state->width) ? state->width : 1, - (state->height) ? state->height : 1, - 1, 0, &fb->null_rt); - - for (i = 0; i < state->nr_cbufs; i++) { - if (state->cbufs[i]) { - fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); - - if (!first_surf) - first_surf = state->cbufs[i]; - } else { - fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]); - } - } - - if (!first_surf && state->zsbuf) - first_surf = state->zsbuf; - - fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1; - if (!fb->num_samples) - fb->num_samples = 1; - - /* - * The PRMs list several restrictions when the framebuffer has more than - * one surface. It seems they are actually lifted on GEN6+. - */ -} diff --git a/src/gallium/drivers/ilo/ilo_state_3d_top.c b/src/gallium/drivers/ilo/ilo_state_3d_top.c deleted file mode 100644 index f022972414e..00000000000 --- a/src/gallium/drivers/ilo/ilo_state_3d_top.c +++ /dev/null @@ -1,1713 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2012-2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "genhw/genhw.h" -#include "core/ilo_format.h" -#include "util/u_dual_blend.h" -#include "util/u_framebuffer.h" -#include "util/u_half.h" -#include "util/u_resource.h" - -#include "ilo_context.h" -#include "ilo_resource.h" -#include "ilo_shader.h" -#include "ilo_state.h" -#include "ilo_state_3d.h" - -static void -ve_init_cso(const struct ilo_dev *dev, - const struct pipe_vertex_element *state, - unsigned vb_index, - struct ilo_ve_cso *cso) -{ - int comp[4] = { - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - GEN6_VFCOMP_STORE_SRC, - }; - int format; - - ILO_DEV_ASSERT(dev, 6, 8); - - switch (util_format_get_nr_components(state->src_format)) { - case 1: comp[1] = GEN6_VFCOMP_STORE_0; - case 2: comp[2] = GEN6_VFCOMP_STORE_0; - case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? - GEN6_VFCOMP_STORE_1_INT : - GEN6_VFCOMP_STORE_1_FP; - } - - format = ilo_format_translate_vertex(dev, state->src_format); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = - vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT | - GEN6_VE_DW0_VALID | - format << GEN6_VE_DW0_FORMAT__SHIFT | - state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; - - cso->payload[1] = - comp[0] << GEN6_VE_DW1_COMP0__SHIFT | - comp[1] << GEN6_VE_DW1_COMP1__SHIFT | - comp[2] << GEN6_VE_DW1_COMP2__SHIFT | - comp[3] << GEN6_VE_DW1_COMP3__SHIFT; -} - -void -ilo_gpe_init_ve(const struct ilo_dev *dev, - unsigned num_states, - const struct pipe_vertex_element *states, - struct ilo_ve_state *ve) -{ - unsigned i; - - ILO_DEV_ASSERT(dev, 6, 8); - - ve->count = num_states; - ve->vb_count = 0; - - for (i = 0; i < num_states; i++) { - const unsigned pipe_idx = states[i].vertex_buffer_index; - const unsigned instance_divisor = states[i].instance_divisor; - unsigned hw_idx; - - /* - * map the pipe vb to the hardware vb, which has a fixed instance - * divisor - */ - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - if (ve->vb_mapping[hw_idx] == pipe_idx && - ve->instance_divisors[hw_idx] == instance_divisor) - break; - } - - /* create one if there is no matching hardware vb */ - if (hw_idx >= ve->vb_count) { - hw_idx = ve->vb_count++; - - ve->vb_mapping[hw_idx] = pipe_idx; - ve->instance_divisors[hw_idx] = instance_divisor; - } - - ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); - } -} - -void -ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_DEV_ASSERT(dev, 6, 8); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE; - - /* - * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as the corresponding _UINT formats. - */ - format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT); - cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK; - - switch (format) { - case GEN6_FORMAT_R32_FLOAT: - format = GEN6_FORMAT_R32_UINT; - break; - case GEN6_FORMAT_R8_USCALED: - format = GEN6_FORMAT_R8_UINT; - break; - default: - break; - } - - cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT); - - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT; -} - -void -ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - - assert(comp0 != GEN6_VFCOMP_STORE_SRC && - comp1 != GEN6_VFCOMP_STORE_SRC && - comp2 != GEN6_VFCOMP_STORE_SRC && - comp3 != GEN6_VFCOMP_STORE_SRC); - - cso->payload[0] = GEN6_VE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_DW1_COMP3__SHIFT; -} - -void -ilo_gpe_init_vs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *vs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 6, 8); - - start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); - vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 135: - * - * "(Vertex URB Entry Read Length) Specifies the number of pairs of - * 128-bit vertex elements to be passed into the payload for each - * vertex." - * - * "It is UNDEFINED to set this field to 0 indicating no Vertex URB - * data to be read and passed to the thread." - */ - vue_read_len = (vue_read_len + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - max_threads = dev->thread_count; - if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2) - max_threads *= 2; - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | - vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; - - dw5 = GEN6_VS_DW5_STATISTICS | - GEN6_VS_DW5_VS_ENABLE; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) - dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; - else - dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; - - STATIC_ASSERT(Elements(cso->payload) >= 3); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; -} - -static void -gs_init_cso_gen6(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, max_threads; - uint32_t dw2, dw4, dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { - start_grf = ilo_shader_get_kernel_param(gs, - ILO_KERNEL_URB_DATA_START_REG); - - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); - } - else { - start_grf = ilo_shader_get_kernel_param(gs, - ILO_KERNEL_VS_GEN6_SO_START_REG); - - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 153: - * - * "Specifies the amount of URB data read and passed in the thread - * payload for each Vertex URB entry, in 256-bit register increments. - * - * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to - * 0 indicating no Vertex URB data to be read and passed to the - * thread." - */ - vue_read_len = (vue_read_len + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 154: - * - * "Maximum Number of Threads valid range is [0,27] when Rendering - * Enabled bit is set." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 173: - * - * "Programming Note: If the GS stage is enabled, software must always - * allocate at least one GS URB Entry. This is true even if the GS - * thread never needs to output vertices to the pipeline, e.g., when - * only performing stream output. This is an artifact of the need to - * pass the GS thread an initial destination URB handle." - * - * As such, we always enable rendering, and limit the number of threads. - */ - if (dev->gt == 2) { - /* maximum is 60, but limited to 28 */ - max_threads = 28; - } - else { - /* maximum is 24, but limited to 21 (see brwCreateContext()) */ - max_threads = 21; - } - - dw2 = GEN6_THREADDISP_SPF; - - dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | - 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; - - dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | - GEN6_GS_DW5_STATISTICS | - GEN6_GS_DW5_SO_STATISTICS | - GEN6_GS_DW5_RENDER_ENABLE; - - /* - * we cannot make use of GEN6_GS_REORDER because it will reorder - * triangle strips according to D3D rules (triangle 2N+1 uses vertices - * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices - * (2N+2, 2N+1, 2N+3)). - */ - dw6 = GEN6_GS_DW6_GS_ENABLE; - - if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) - dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; - - if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { - const uint32_t svbi_post_inc = - ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); - - dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; - if (svbi_post_inc) { - dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | - svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; - } - } - - STATIC_ASSERT(Elements(cso->payload) >= 4); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; - cso->payload[3] = dw6; -} - -static void -gs_init_cso_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - int start_grf, vue_read_len, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); - vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); - - /* in pairs */ - vue_read_len = (vue_read_len + 1) / 2; - - switch (ilo_dev_gen(dev)) { - case ILO_GEN(7.5): - max_threads = (dev->gt >= 2) ? 256 : 70; - break; - case ILO_GEN(7): - max_threads = (dev->gt == 2) ? 128 : 36; - break; - default: - max_threads = 1; - break; - } - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | - GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | - 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | - start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; - - dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | - GEN7_GS_DW5_STATISTICS | - GEN7_GS_DW5_GS_ENABLE; - - STATIC_ASSERT(Elements(cso->payload) >= 3); - cso->payload[0] = dw2; - cso->payload[1] = dw4; - cso->payload[2] = dw5; -} - -void -ilo_gpe_init_gs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *gs, - struct ilo_shader_cso *cso) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) - gs_init_cso_gen7(dev, gs, cso); - else - gs_init_cso_gen6(dev, gs, cso); -} - -static void -view_init_null_gen6(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - assert(width >= 1 && height >= 1 && depth >= 1); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 71: - * - * "A null surface will be used in instances where an actual surface is - * not bound. When a write message is generated to a null surface, no - * actual surface is written to. When a read message (including any - * sampling engine message) is generated to a null surface, the result - * is all zeros. Note that a null surface type is allowed to be used - * with all messages, even if it is not specificially indicated as - * supported. All of the remaining fields in surface state are ignored - * for null surfaces, with the following exceptions: - * - * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the - * depth buffer's corresponding state for all render target - * surfaces, including null. - * * Surface Format must be R8G8B8A8_UNORM." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 82: - * - * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be - * true" - */ - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; - - dw[1] = 0; - - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; - - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - GEN6_TILING_X; - - dw[4] = 0; - dw[5] = 0; -} - -static void -view_init_for_buffer_gen6(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf) -{ - const int elem_size = util_format_get_blocksize(elem_format); - int width, height, depth, pitch; - int surface_format, num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - /* - * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a - * structure in a buffer. - */ - - surface_format = ilo_format_translate_color(dev, elem_format); - - num_entries = size / struct_size; - /* see if there is enough space to fit another element */ - if (size % struct_size >= elem_size) - num_entries++; - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 76: - * - * "For SURFTYPE_BUFFER render targets, this field (Surface Base - * Address) specifies the base address of first element of the - * surface. The surface is interpreted as a simple array of that - * single element type. The address must be naturally-aligned to the - * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements - * must be 16-byte aligned). - * - * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies - * the base address of the first element of the surface, computed in - * software by adding the surface base address to the byte offset of - * the element in the buffer." - */ - if (is_rt) - assert(offset % elem_size == 0); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 77: - * - * "For buffer surfaces, the number of entries in the buffer ranges - * from 1 to 2^27." - */ - assert(num_entries >= 1 && num_entries <= 1 << 27); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) - * indicates the size of the structure." - */ - pitch = struct_size; - - pitch--; - num_entries--; - /* bits [6:0] */ - width = (num_entries & 0x0000007f); - /* bits [19:7] */ - height = (num_entries & 0x000fff80) >> 7; - /* bits [26:20] */ - depth = (num_entries & 0x07f00000) >> 20; - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; - if (render_cache_rw) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; - - dw[1] = offset; - - dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - width << GEN6_SURFACE_DW2_WIDTH__SHIFT; - - dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | - pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; - - dw[4] = 0; - dw[5] = 0; -} - -static void -view_init_for_texture_gen6(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - int surface_type, surface_format; - int width, height, depth, pitch, lod; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 6, 6); - - surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); - - if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) - format = PIPE_FORMAT_Z32_FLOAT; - - if (is_rt) - surface_format = ilo_format_translate_render(dev, format); - else - surface_format = ilo_format_translate_texture(dev, format); - assert(surface_format >= 0); - - width = tex->image.width0; - height = tex->image.height0; - depth = (tex->base.target == PIPE_TEXTURE_3D) ? - tex->base.depth0 : num_layers; - pitch = tex->image.bo_stride; - - if (surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the - * range of this field (Depth) is [0,84], indicating the number of - * cube array elements (equal to the number of underlying 2D array - * elements divided by 6). For other surfaces, this field must be - * zero." - * - * When is_rt is true, we treat the texture as a 2D one to avoid the - * restriction. - */ - if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; - } - else { - assert(num_layers % 6 == 0); - depth = num_layers / 6; - } - } - - /* sanity check the size */ - assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); - switch (surface_type) { - case GEN6_SURFTYPE_1D: - assert(width <= 8192 && height == 1 && depth <= 512); - assert(first_layer < 512 && num_layers <= 512); - break; - case GEN6_SURFTYPE_2D: - assert(width <= 8192 && height <= 8192 && depth <= 512); - assert(first_layer < 512 && num_layers <= 512); - break; - case GEN6_SURFTYPE_3D: - assert(width <= 2048 && height <= 2048 && depth <= 2048); - assert(first_layer < 2048 && num_layers <= 512); - if (!is_rt) - assert(first_layer == 0); - break; - case GEN6_SURFTYPE_CUBE: - assert(width <= 8192 && height <= 8192 && depth <= 85); - assert(width == height); - assert(first_layer < 512 && num_layers <= 512); - if (is_rt) - assert(first_layer == 0); - break; - default: - assert(!"unexpected surface type"); - break; - } - - /* non-full array spacing is supported only on GEN7+ */ - assert(tex->image.walk != ILO_IMAGE_WALK_LOD); - /* non-interleaved samples are supported only on GEN7+ */ - if (tex->base.nr_samples > 1) - assert(tex->image.interleaved_samples); - - if (is_rt) { - assert(num_levels == 1); - lod = first_level; - } - else { - lod = num_levels - 1; - } - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 76: - * - * "Linear render target surface base addresses must be element-size - * aligned, for non-YUV surface formats, or a multiple of 2 - * element-sizes for YUV surface formats. Other linear surfaces have - * no alignment requirements (byte alignment is sufficient.)" - * - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For linear render target surfaces, the pitch must be a multiple - * of the element size for non-YUV surface formats. Pitch must be a - * multiple of 2 * element size for YUV surface formats." - * - * From the Sandy Bridge PRM, volume 4 part 1, page 86: - * - * "For linear surfaces, this field (X Offset) must be zero" - */ - if (tex->image.tiling == GEN6_TILING_NONE) { - if (is_rt) { - const int elem_size = util_format_get_blocksize(format); - assert(pitch % elem_size == 0); - } - } - - STATIC_ASSERT(Elements(surf->payload) >= 6); - dw = surf->payload; - - dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | - GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; - - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { - dw[0] |= 1 << 9 | - GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; - } - - if (is_rt) - dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; - - dw[1] = 0; - - dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | - (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | - lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; - - assert(tex->image.tiling != GEN8_TILING_W); - dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | - (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | - tex->image.tiling; - - dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | - first_layer << 17 | - (num_layers - 1) << 8 | - ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : - GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); - - dw[5] = 0; - - assert(tex->image.align_j == 2 || tex->image.align_j == 4); - if (tex->image.align_j == 4) - dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; -} - -static void -view_init_null_gen7(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - assert(width >= 1 && height >= 1 && depth >= 1); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 62: - * - * "A null surface is used in instances where an actual surface is not - * bound. When a write message is generated to a null surface, no - * actual surface is written to. When a read message (including any - * sampling engine message) is generated to a null surface, the result - * is all zeros. Note that a null surface type is allowed to be used - * with all messages, even if it is not specificially indicated as - * supported. All of the remaining fields in surface state are ignored - * for null surfaces, with the following exceptions: - * - * * Width, Height, Depth, LOD, and Render Target View Extent fields - * must match the depth buffer's corresponding state for all render - * target surfaces, including null. - * * All sampling engine and data port messages support null surfaces - * with the above behavior, even if not mentioned as specifically - * supported, except for the following: - * * Data Port Media Block Read/Write messages. - * * The Surface Type of a surface used as a render target (accessed - * via the Data Port's Render Target Write message) must be the same - * as the Surface Type of all other render targets and of the depth - * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth - * buffer or render targets are SURFTYPE_NULL." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 65: - * - * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be - * true" - */ - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | - GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT; - else - dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT; - - dw[1] = 0; - - dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH); - - dw[4] = 0; - dw[5] = level; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); -} - -static void -view_init_for_buffer_gen7(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf) -{ - const bool typed = (elem_format != PIPE_FORMAT_NONE); - const bool structured = (!typed && struct_size > 1); - const int elem_size = (typed) ? - util_format_get_blocksize(elem_format) : 1; - int width, height, depth, pitch; - int surface_type, surface_format, num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; - - surface_format = (typed) ? - ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW; - - num_entries = size / struct_size; - /* see if there is enough space to fit another element */ - if (size % struct_size >= elem_size && !structured) - num_entries++; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 67: - * - * "For SURFTYPE_BUFFER render targets, this field (Surface Base - * Address) specifies the base address of first element of the - * surface. The surface is interpreted as a simple array of that - * single element type. The address must be naturally-aligned to the - * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements - * must be 16-byte aligned) - * - * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies - * the base address of the first element of the surface, computed in - * software by adding the surface base address to the byte offset of - * the element in the buffer." - */ - if (is_rt) - assert(offset % elem_size == 0); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 68: - * - * "For typed buffer and structured buffer surfaces, the number of - * entries in the buffer ranges from 1 to 2^27. For raw buffer - * surfaces, the number of entries in the buffer is the number of - * bytes which can range from 1 to 2^30." - */ - assert(num_entries >= 1 && - num_entries <= 1 << ((typed || structured) ? 27 : 30)); - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 69: - * - * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be - * 11 if the Surface Format is RAW (the size of the buffer must be a - * multiple of 4 bytes)." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this - * field (Surface Pitch) indicates the size of the structure." - * - * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch - * must be a multiple of 4 bytes." - */ - if (structured) - assert(struct_size % 4 == 0); - else if (!typed) - assert(num_entries % 4 == 0); - - pitch = struct_size; - - pitch--; - num_entries--; - /* bits [6:0] */ - width = (num_entries & 0x0000007f); - /* bits [20:7] */ - height = (num_entries & 0x001fff80) >> 7; - /* bits [30:21] */ - depth = (num_entries & 0x7fe00000) >> 21; - /* limit to [26:21] */ - if (typed || structured) - depth &= 0x3f; - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; - if (render_cache_rw) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - dw[8] = offset; - memset(&dw[9], 0, sizeof(*dw) * (13 - 9)); - } else { - dw[1] = offset; - } - - dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | - pitch; - - dw[4] = 0; - dw[5] = 0; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } -} - -static void -view_init_for_texture_gen7(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - int surface_type, surface_format; - int width, height, depth, pitch, lod; - uint32_t *dw; - - ILO_DEV_ASSERT(dev, 7, 8); - - surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); - assert(surface_type != GEN6_SURFTYPE_BUFFER); - - if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) - format = PIPE_FORMAT_Z32_FLOAT; - - if (is_rt) - surface_format = ilo_format_translate_render(dev, format); - else - surface_format = ilo_format_translate_texture(dev, format); - assert(surface_format >= 0); - - width = tex->image.width0; - height = tex->image.height0; - depth = (tex->base.target == PIPE_TEXTURE_3D) ? - tex->base.depth0 : num_layers; - pitch = tex->image.bo_stride; - - if (surface_type == GEN6_SURFTYPE_CUBE) { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of - * this field is [0,340], indicating the number of cube array - * elements (equal to the number of underlying 2D array elements - * divided by 6). For other surfaces, this field must be zero." - * - * When is_rt is true, we treat the texture as a 2D one to avoid the - * restriction. - */ - if (is_rt) { - surface_type = GEN6_SURFTYPE_2D; - } - else { - assert(num_layers % 6 == 0); - depth = num_layers / 6; - } - } - - /* sanity check the size */ - assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); - assert(first_layer < 2048 && num_layers <= 2048); - switch (surface_type) { - case GEN6_SURFTYPE_1D: - assert(width <= 16384 && height == 1 && depth <= 2048); - break; - case GEN6_SURFTYPE_2D: - assert(width <= 16384 && height <= 16384 && depth <= 2048); - break; - case GEN6_SURFTYPE_3D: - assert(width <= 2048 && height <= 2048 && depth <= 2048); - if (!is_rt) - assert(first_layer == 0); - break; - case GEN6_SURFTYPE_CUBE: - assert(width <= 16384 && height <= 16384 && depth <= 86); - assert(width == height); - if (is_rt) - assert(first_layer == 0); - break; - default: - assert(!"unexpected surface type"); - break; - } - - if (is_rt) { - assert(num_levels == 1); - lod = first_level; - } - else { - lod = num_levels - 1; - } - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 68: - * - * "The Base Address for linear render target surfaces and surfaces - * accessed with the typed surface read/write data port messages must - * be element-size aligned, for non-YUV surface formats, or a multiple - * of 2 element-sizes for YUV surface formats. Other linear surfaces - * have no alignment requirements (byte alignment is sufficient)." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For linear render target surfaces and surfaces accessed with the - * typed data port messages, the pitch must be a multiple of the - * element size for non-YUV surface formats. Pitch must be a multiple - * of 2 * element size for YUV surface formats. For linear surfaces - * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple - * of 4 bytes.For other linear surfaces, the pitch can be any multiple - * of bytes." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 74: - * - * "For linear surfaces, this field (X Offset) must be zero." - */ - if (tex->image.tiling == GEN6_TILING_NONE) { - if (is_rt) { - const int elem_size = util_format_get_blocksize(format); - assert(pitch % elem_size == 0); - } - } - - STATIC_ASSERT(Elements(surf->payload) >= 13); - dw = surf->payload; - - dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | - surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 63: - * - * "If this field (Surface Array) is enabled, the Surface Type must be - * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is - * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or - * SURFTYPE_CUBE, the Depth field must be set to zero." - * - * For non-3D sampler surfaces, resinfo (the sampler message) always - * returns zero for the number of layers when this field is not set. - */ - if (surface_type != GEN6_SURFTYPE_3D) { - if (util_resource_is_array_texture(&tex->base)) - dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; - else - assert(depth == 1); - } - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - switch (tex->image.align_j) { - case 4: - dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; - break; - case 8: - dw[0] |= GEN8_SURFACE_DW0_VALIGN_8; - break; - case 16: - dw[0] |= GEN8_SURFACE_DW0_VALIGN_16; - break; - default: - assert(!"unsupported valign"); - break; - } - - switch (tex->image.align_i) { - case 4: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_4; - break; - case 8: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_8; - break; - case 16: - dw[0] |= GEN8_SURFACE_DW0_HALIGN_16; - break; - default: - assert(!"unsupported halign"); - break; - } - - dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT; - } else { - assert(tex->image.align_i == 4 || tex->image.align_i == 8); - assert(tex->image.align_j == 2 || tex->image.align_j == 4); - - if (tex->image.align_j == 4) - dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; - - if (tex->image.align_i == 8) - dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; - - assert(tex->image.tiling != GEN8_TILING_W); - dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT; - - if (tex->image.walk == ILO_IMAGE_WALK_LOD) - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; - else - dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; - } - - if (is_rt) - dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; - - if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) - dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(tex->image.layer_height % 4 == 0); - dw[1] = tex->image.layer_height / 4; - } else { - dw[1] = 0; - } - - dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | - GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); - - dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) | - (pitch - 1); - - dw[4] = first_layer << 18 | - (num_layers - 1) << 7; - - /* - * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL - * means the samples are interleaved. The layouts are the same when the - * number of samples is 1. - */ - if (tex->image.interleaved_samples && tex->base.nr_samples > 1) { - assert(!is_rt); - dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; - } - else { - dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; - } - - switch (tex->base.nr_samples) { - case 0: - case 1: - default: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; - break; - case 2: - dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2; - break; - case 4: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; - break; - case 8: - dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; - break; - case 16: - dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16; - break; - } - - dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) | - lod; - - dw[6] = 0; - dw[7] = 0; - - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | - GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | - GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | - GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); - } - - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); -} - -void -ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, - unsigned width, unsigned height, - unsigned depth, unsigned level, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_null_gen7(dev, - width, height, depth, level, surf); - } else { - view_init_null_gen6(dev, - width, height, depth, level, surf); - } - - surf->bo = NULL; - surf->scanout = false; -} - -void -ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, - const struct ilo_buffer *buf, - unsigned offset, unsigned size, - unsigned struct_size, - enum pipe_format elem_format, - bool is_rt, bool render_cache_rw, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_for_buffer_gen7(dev, buf, offset, size, - struct_size, elem_format, is_rt, render_cache_rw, surf); - } else { - view_init_for_buffer_gen6(dev, buf, offset, size, - struct_size, elem_format, is_rt, render_cache_rw, surf); - } - - /* do not increment reference count */ - surf->bo = buf->bo; - surf->scanout = false; -} - -void -ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev, - const struct ilo_texture *tex, - enum pipe_format format, - unsigned first_level, - unsigned num_levels, - unsigned first_layer, - unsigned num_layers, - bool is_rt, - struct ilo_view_surface *surf) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - view_init_for_texture_gen7(dev, tex, format, - first_level, num_levels, first_layer, num_layers, - is_rt, surf); - } else { - view_init_for_texture_gen6(dev, tex, format, - first_level, num_levels, first_layer, num_layers, - is_rt, surf); - } - - /* do not increment reference count */ - surf->bo = tex->image.bo; - - /* assume imported RTs are scanouts */ - surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) || - (tex->imported && (tex->base.bind & PIPE_BIND_RENDER_TARGET))); -} - -static void -sampler_init_border_color_gen6(const struct ilo_dev *dev, - const union pipe_color_union *color, - uint32_t *dw, int num_dwords) -{ - float rgba[4] = { - color->f[0], color->f[1], color->f[2], color->f[3], - }; - - ILO_DEV_ASSERT(dev, 6, 6); - - assert(num_dwords >= 12); - - /* - * This state is not documented in the Sandy Bridge PRM, but in the - * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. - */ - - /* IEEE_FP */ - dw[1] = fui(rgba[0]); - dw[2] = fui(rgba[1]); - dw[3] = fui(rgba[2]); - dw[4] = fui(rgba[3]); - - /* FLOAT_16 */ - dw[5] = util_float_to_half(rgba[0]) | - util_float_to_half(rgba[1]) << 16; - dw[6] = util_float_to_half(rgba[2]) | - util_float_to_half(rgba[3]) << 16; - - /* clamp to [-1.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); - - /* SNORM16 */ - dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | - (int16_t) util_iround(rgba[1] * 32767.0f) << 16; - dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | - (int16_t) util_iround(rgba[3] * 32767.0f) << 16; - - /* SNORM8 */ - dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | - (int8_t) util_iround(rgba[1] * 127.0f) << 8 | - (int8_t) util_iround(rgba[2] * 127.0f) << 16 | - (int8_t) util_iround(rgba[3] * 127.0f) << 24; - - /* clamp to [0.0f, 1.0f] */ - rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); - rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); - rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); - rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); - - /* UNORM8 */ - dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | - (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | - (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | - (uint8_t) util_iround(rgba[3] * 255.0f) << 24; - - /* UNORM16 */ - dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | - (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; - dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | - (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; -} - -/** - * Translate a pipe texture mipfilter to the matching hardware mipfilter. - */ -static int -gen6_translate_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; - case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; - case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; - default: - assert(!"unknown mipfilter"); - return GEN6_MIPFILTER_NONE; - } -} - -/** - * Translate a pipe texture filter to the matching hardware mapfilter. - */ -static int -gen6_translate_tex_filter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; - default: - assert(!"unknown sampler filter"); - return GEN6_MAPFILTER_NEAREST; - } -} - -/** - * Translate a pipe texture coordinate wrapping mode to the matching hardware - * wrapping mode. - */ -static int -gen6_translate_tex_wrap(unsigned wrap) -{ - switch (wrap) { - case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER; - case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(!"unknown sampler wrap mode"); - return GEN6_TEXCOORDMODE_WRAP; - } -} - -/** - * Translate a pipe shadow compare function to the matching hardware shadow - * function. - */ -static int -gen6_translate_shadow_func(unsigned func) -{ - /* - * For PIPE_FUNC_x, the reference value is on the left-hand side of the - * comparison, and 1.0 is returned when the comparison is true. - * - * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of - * the comparison, and 0.0 is returned when the comparison is true. - */ - switch (func) { - case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; - case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; - case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; - default: - assert(!"unknown shadow compare function"); - return GEN6_COMPAREFUNCTION_NEVER; - } -} - -void -ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, - const struct pipe_sampler_state *state, - struct ilo_sampler_cso *sampler) -{ - int mip_filter, min_filter, mag_filter, max_aniso; - int lod_bias, max_lod, min_lod; - int wrap_s, wrap_t, wrap_r, wrap_cube; - uint32_t dw0, dw1, dw3; - - ILO_DEV_ASSERT(dev, 6, 8); - - memset(sampler, 0, sizeof(*sampler)); - - mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); - min_filter = gen6_translate_tex_filter(state->min_img_filter); - mag_filter = gen6_translate_tex_filter(state->mag_img_filter); - - sampler->anisotropic = state->max_anisotropy; - - if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) - max_aniso = state->max_anisotropy / 2 - 1; - else if (state->max_anisotropy > 16) - max_aniso = GEN6_ANISORATIO_16; - else - max_aniso = GEN6_ANISORATIO_2; - - /* - * - * Here is how the hardware calculate per-pixel LOD, from my reading of the - * PRMs: - * - * 1) LOD is set to log2(ratio of texels to pixels) if not specified in - * other ways. The number of texels is measured using level - * SurfMinLod. - * 2) Bias is added to LOD. - * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is - * compared with Base to determine whether magnification or - * minification is needed. (if preclamp is disabled, LOD is compared - * with Base before clamping) - * 4) If magnification is needed, or no mipmapping is requested, LOD is - * set to floor(MinLod). - * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. - * - * With Gallium interface, Base is always zero and - * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. - */ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - const float scale = 256.0f; - - /* [-16.0, 16.0) in S4.8 */ - lod_bias = (int) - (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x1fff; - - /* [0.0, 14.0] in U4.8 */ - max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); - min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); - } - else { - const float scale = 64.0f; - - /* [-16.0, 16.0) in S4.6 */ - lod_bias = (int) - (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); - lod_bias &= 0x7ff; - - /* [0.0, 13.0] in U4.6 */ - max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); - min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); - } - - /* - * We want LOD to be clamped to determine magnification/minification, and - * get set to zero when it is magnification or when mipmapping is disabled. - * The hardware would set LOD to floor(MinLod) and that is a problem when - * MinLod is greater than or equal to 1.0f. - * - * With Base being zero, it is always minification when MinLod is non-zero. - * To achieve our goal, we just need to set MinLod to zero and set - * MagFilter to MinFilter when mipmapping is disabled. - */ - if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { - min_lod = 0; - mag_filter = min_filter; - } - - /* determine wrap s/t/r */ - wrap_s = gen6_translate_tex_wrap(state->wrap_s); - wrap_t = gen6_translate_tex_wrap(state->wrap_t); - wrap_r = gen6_translate_tex_wrap(state->wrap_r); - if (ilo_dev_gen(dev) < ILO_GEN(8)) { - /* - * For nearest filtering, PIPE_TEX_WRAP_CLAMP means - * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, - * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while - * additionally clamping the texture coordinates to [0.0, 1.0]. - * - * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The - * clamping has to be taken care of in the shaders. There are two - * filters here, but let the minification one has a say. - */ - const bool clamp_is_to_edge = - (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); - - if (clamp_is_to_edge) { - if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_s = GEN6_TEXCOORDMODE_CLAMP; - if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_t = GEN6_TEXCOORDMODE_CLAMP; - if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) - wrap_r = GEN6_TEXCOORDMODE_CLAMP; - } else { - if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_s = true; - } - if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_t = true; - } - if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) { - wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler->saturate_r = true; - } - } - } - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 107: - * - * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP - * and TEXCOORDMODE_CUBE settings are valid, and each TC component - * must have the same Address Control mode." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 96: - * - * "This field (Cube Surface Control Mode) must be set to - * CUBECTRLMODE_PROGRAMMED" - * - * Therefore, we cannot use "Cube Surface Control Mode" for semless cube - * map filtering. - */ - if (state->seamless_cube_map && - (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || - state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { - wrap_cube = GEN6_TEXCOORDMODE_CUBE; - } - else { - wrap_cube = GEN6_TEXCOORDMODE_CLAMP; - } - - if (!state->normalized_coords) { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 98: - * - * "The following state must be set as indicated if this field - * (Non-normalized Coordinate Enable) is enabled: - * - * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, - * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. - * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. - * - Mag Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Min Mode Filter must be MAPFILTER_NEAREST or - * MAPFILTER_LINEAR. - * - Mip Mode Filter must be MIPFILTER_NONE. - * - Min LOD must be 0. - * - Max LOD must be 0. - * - MIP Count must be 0. - * - Surface Min LOD must be 0. - * - Texture LOD Bias must be 0." - */ - assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || - wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || - wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); - assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || - wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); - - assert(mag_filter == GEN6_MAPFILTER_NEAREST || - mag_filter == GEN6_MAPFILTER_LINEAR); - assert(min_filter == GEN6_MAPFILTER_NEAREST || - min_filter == GEN6_MAPFILTER_LINEAR); - - /* work around a bug in util_blitter */ - mip_filter = GEN6_MIPFILTER_NONE; - - assert(mip_filter == GEN6_MIPFILTER_NONE); - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - dw0 = 1 << 28 | - mip_filter << 20 | - lod_bias << 1; - - sampler->dw_filter = mag_filter << 17 | - min_filter << 14; - - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14 | - 1; - - dw1 = min_lod << 20 | - max_lod << 8; - - if (state->compare_mode != PIPE_TEX_COMPARE_NONE) - dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; - - dw3 = max_aniso << 19; - - /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); - } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); - } - - if (!state->normalized_coords) - dw3 |= 1 << 10; - - sampler->dw_wrap = wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - /* - * As noted in the classic i965 driver, the HW may still reference - * wrap_t and wrap_r for 1D textures. We need to set them to a safe - * mode - */ - sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; - - sampler->dw_wrap_cube = wrap_cube << 6 | - wrap_cube << 3 | - wrap_cube; - - STATIC_ASSERT(Elements(sampler->payload) >= 7); - - sampler->payload[0] = dw0; - sampler->payload[1] = dw1; - sampler->payload[2] = dw3; - - memcpy(&sampler->payload[3], - state->border_color.ui, sizeof(state->border_color.ui)); - } - else { - dw0 = 1 << 28 | - mip_filter << 20 | - lod_bias << 3; - - if (state->compare_mode != PIPE_TEX_COMPARE_NONE) - dw0 |= gen6_translate_shadow_func(state->compare_func); - - sampler->dw_filter = (min_filter != mag_filter) << 27 | - mag_filter << 17 | - min_filter << 14; - - sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | - GEN6_MAPFILTER_ANISOTROPIC << 14; - - dw1 = min_lod << 22 | - max_lod << 12; - - sampler->dw_wrap = wrap_s << 6 | - wrap_t << 3 | - wrap_r; - - sampler->dw_wrap_1d = wrap_s << 6 | - GEN6_TEXCOORDMODE_WRAP << 3 | - GEN6_TEXCOORDMODE_WRAP; - - sampler->dw_wrap_cube = wrap_cube << 6 | - wrap_cube << 3 | - wrap_cube; - - dw3 = max_aniso << 19; - - /* round the coordinates for linear filtering */ - if (min_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | - GEN6_SAMPLER_DW3_V_MIN_ROUND | - GEN6_SAMPLER_DW3_R_MIN_ROUND); - } - if (mag_filter != GEN6_MAPFILTER_NEAREST) { - dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | - GEN6_SAMPLER_DW3_V_MAG_ROUND | - GEN6_SAMPLER_DW3_R_MAG_ROUND); - } - - if (!state->normalized_coords) - dw3 |= 1; - - STATIC_ASSERT(Elements(sampler->payload) >= 15); - - sampler->payload[0] = dw0; - sampler->payload[1] = dw1; - sampler->payload[2] = dw3; - - sampler_init_border_color_gen6(dev, - &state->border_color, &sampler->payload[3], 12); - } -} -- cgit v1.2.3