diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/Android.mk | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/blorp.c | 290 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/blorp.h | 153 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/blorp_blit.c | 1649 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/blorp_clear.c | 344 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/blorp_priv.h | 291 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.h | 1176 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_resolve_map.h | 2 |
13 files changed, 11 insertions, 3922 deletions
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 971c3c58e8a..335850a8d88 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -183,6 +183,7 @@ LOCAL_SRC_FILES := \ LOCAL_WHOLE_STATIC_LIBRARIES := \ $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \ $(I965_PERGEN_LIBS) \ + libmesa_blorp \ libmesa_isl LOCAL_SHARED_LIBRARIES := \ diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index d6bafed3000..14dbb9cd6d9 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -79,6 +79,7 @@ libi965_dri_la_SOURCES = $(i965_FILES) libi965_dri_la_LIBADD = \ $(top_builddir)/src/intel/isl/libisl.la \ libi965_compiler.la \ + $(top_builddir)/src/intel/blorp/libblorp.la \ $(I965_PERGEN_LIBS) \ $(INTEL_LIBS) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index c97486c2de0..9ff5cebeab2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -95,11 +95,6 @@ i965_compiler_GENERATED_FILES = \ brw_nir_trig_workarounds.c i965_FILES = \ - blorp.c \ - blorp.h \ - blorp_blit.c \ - blorp_clear.c \ - blorp_priv.h \ brw_binding_tables.c \ brw_blorp.c \ brw_blorp.h \ @@ -259,21 +254,16 @@ i965_FILES = \ intel_upload.c i965_gen6_FILES = \ - genX_blorp_exec.c \ - genX_blorp_exec.h + genX_blorp_exec.c i965_gen7_FILES = \ - genX_blorp_exec.c \ - genX_blorp_exec.h + genX_blorp_exec.c i965_gen75_FILES = \ - genX_blorp_exec.c \ - genX_blorp_exec.h + genX_blorp_exec.c i965_gen8_FILES = \ - genX_blorp_exec.c \ - genX_blorp_exec.h + genX_blorp_exec.c i965_gen9_FILES = \ - genX_blorp_exec.c \ - genX_blorp_exec.h + genX_blorp_exec.c diff --git a/src/mesa/drivers/dri/i965/blorp.c b/src/mesa/drivers/dri/i965/blorp.c deleted file mode 100644 index 8640abe146c..00000000000 --- a/src/mesa/drivers/dri/i965/blorp.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <errno.h> - -#include "blorp_priv.h" -#include "brw_compiler.h" -#include "brw_nir.h" - -void -blorp_init(struct blorp_context *blorp, void *driver_ctx, - struct isl_device *isl_dev) -{ - blorp->driver_ctx = driver_ctx; - blorp->isl_dev = isl_dev; -} - -void -blorp_finish(struct blorp_context *blorp) -{ - blorp->driver_ctx = NULL; -} - -void -blorp_batch_init(struct blorp_context *blorp, - struct blorp_batch *batch, void *driver_batch) -{ - batch->blorp = blorp; - batch->driver_batch = driver_batch; -} - -void -blorp_batch_finish(struct blorp_batch *batch) -{ - batch->blorp = NULL; -} - -void -brw_blorp_surface_info_init(struct blorp_context *blorp, - struct brw_blorp_surface_info *info, - const struct blorp_surf *surf, - unsigned int level, unsigned int layer, - enum isl_format format, bool is_render_target) -{ - /* Layer is a physical layer, so if this is a 2D multisample array texture - * using INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, then it had better - * be a multiple of num_samples. - */ - unsigned layer_multiplier = 1; - if (surf->surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY) { - assert(layer % surf->surf->samples == 0); - layer_multiplier = surf->surf->samples; - } - - if (format == ISL_FORMAT_UNSUPPORTED) - format = surf->surf->format; - - if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { - /* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as - * a render target, which would prevent us from blitting to 24-bit - * depth. The miptree consists of 32 bits per pixel, arranged as 24-bit - * depth values interleaved with 8 "don't care" bits. Since depth - * values don't require any blending, it doesn't matter how we interpret - * the bit pattern as long as we copy the right amount of data, so just - * map it as 8-bit BGRA. - */ - format = ISL_FORMAT_B8G8R8A8_UNORM; - } else if (surf->surf->usage & ISL_SURF_USAGE_STENCIL_BIT) { - assert(surf->surf->format == ISL_FORMAT_R8_UINT); - /* Prior to Broadwell, we can't render to R8_UINT */ - if (blorp->isl_dev->info->gen < 8) - format = ISL_FORMAT_R8_UNORM; - } - - info->surf = *surf->surf; - info->addr = surf->addr; - - info->aux_usage = surf->aux_usage; - if (info->aux_usage != ISL_AUX_USAGE_NONE) { - info->aux_surf = *surf->aux_surf; - info->aux_addr = surf->aux_addr; - } - - info->clear_color = surf->clear_color; - - info->view = (struct isl_view) { - .usage = is_render_target ? ISL_SURF_USAGE_RENDER_TARGET_BIT : - ISL_SURF_USAGE_TEXTURE_BIT, - .format = format, - .base_level = level, - .levels = 1, - .channel_select = { - ISL_CHANNEL_SELECT_RED, - ISL_CHANNEL_SELECT_GREEN, - ISL_CHANNEL_SELECT_BLUE, - ISL_CHANNEL_SELECT_ALPHA, - }, - }; - - if (!is_render_target && - (info->surf.dim == ISL_SURF_DIM_3D || - info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) { - /* 3-D textures don't support base_array layer and neither do 2-D - * multisampled textures on IVB so we need to pass it through the - * sampler in those cases. These are also two cases where we are - * guaranteed that we won't be doing any funny surface hacks. - */ - info->view.base_array_layer = 0; - info->view.array_len = MAX2(info->surf.logical_level0_px.depth, - info->surf.logical_level0_px.array_len); - info->z_offset = layer / layer_multiplier; - } else { - info->view.base_array_layer = layer / layer_multiplier; - info->view.array_len = 1; - info->z_offset = 0; - } -} - - -void -blorp_params_init(struct blorp_params *params) -{ - memset(params, 0, sizeof(*params)); - params->num_draw_buffers = 1; - params->num_layers = 1; -} - -void -brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key) -{ - memset(wm_key, 0, sizeof(*wm_key)); - wm_key->nr_color_regions = 1; - for (int i = 0; i < MAX_SAMPLERS; i++) - wm_key->tex.swizzles[i] = SWIZZLE_XYZW; -} - -static int -nir_uniform_type_size(const struct glsl_type *type) -{ - /* Only very basic types are allowed */ - assert(glsl_type_is_vector_or_scalar(type)); - assert(glsl_get_bit_size(type) == 32); - - return glsl_get_vector_elements(type) * 4; -} - -const unsigned * -brw_blorp_compile_nir_shader(struct blorp_context *blorp, struct nir_shader *nir, - const struct brw_wm_prog_key *wm_key, - bool use_repclear, - struct brw_blorp_prog_data *prog_data, - unsigned *program_size) -{ - const struct brw_compiler *compiler = blorp->compiler; - - void *mem_ctx = ralloc_context(NULL); - - /* Calling brw_preprocess_nir and friends is destructive and, if cloning is - * enabled, may end up completely replacing the nir_shader. Therefore, we - * own it and might as well put it in our context for easy cleanup. - */ - ralloc_steal(mem_ctx, nir); - nir->options = - compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; - - struct brw_wm_prog_data wm_prog_data; - memset(&wm_prog_data, 0, sizeof(wm_prog_data)); - - wm_prog_data.base.nr_params = 0; - wm_prog_data.base.param = NULL; - - /* BLORP always just uses the first two binding table entries */ - wm_prog_data.binding_table.render_target_start = BLORP_RENDERBUFFER_BT_INDEX; - wm_prog_data.base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; - - nir = brw_preprocess_nir(compiler, nir); - nir_remove_dead_variables(nir, nir_var_shader_in); - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - /* Uniforms are required to be lowered before going into compile_fs. For - * BLORP, we'll assume that whoever builds the shader sets the location - * they want so we just need to lower them and figure out how many we have - * in total. - */ - nir->num_uniforms = 0; - nir_foreach_variable(var, &nir->uniforms) { - var->data.driver_location = var->data.location; - unsigned end = var->data.location + nir_uniform_type_size(var->type); - nir->num_uniforms = MAX2(nir->num_uniforms, end); - } - nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size); - - const unsigned *program = - brw_compile_fs(compiler, blorp->driver_ctx, mem_ctx, - wm_key, &wm_prog_data, nir, - NULL, -1, -1, false, use_repclear, program_size, NULL); - - /* Copy the relavent bits of wm_prog_data over into the blorp prog data */ - prog_data->dispatch_8 = wm_prog_data.dispatch_8; - prog_data->dispatch_16 = wm_prog_data.dispatch_16; - prog_data->first_curbe_grf_0 = wm_prog_data.base.dispatch_grf_start_reg; - prog_data->first_curbe_grf_2 = wm_prog_data.dispatch_grf_start_reg_2; - prog_data->ksp_offset_2 = wm_prog_data.prog_offset_2; - prog_data->persample_msaa_dispatch = wm_prog_data.persample_dispatch; - prog_data->flat_inputs = wm_prog_data.flat_inputs; - prog_data->num_varying_inputs = wm_prog_data.num_varying_inputs; - prog_data->inputs_read = nir->info.inputs_read; - - assert(wm_prog_data.base.nr_params == 0); - - return program; -} - -void -blorp_gen6_hiz_op(struct blorp_batch *batch, - struct blorp_surf *surf, unsigned level, unsigned layer, - enum blorp_hiz_op op) -{ - struct blorp_params params; - blorp_params_init(¶ms); - - params.hiz_op = op; - - brw_blorp_surface_info_init(batch->blorp, ¶ms.depth, surf, level, layer, - surf->surf->format, true); - - /* Align the rectangle primitive to 8x4 pixels. - * - * During fast depth clears, the emitted rectangle primitive must be - * aligned to 8x4 pixels. From the Ivybridge PRM, Vol 2 Part 1 Section - * 11.5.3.1 Depth Buffer Clear (and the matching section in the Sandybridge - * PRM): - * If Number of Multisamples is NUMSAMPLES_1, the rectangle must be - * aligned to an 8x4 pixel block relative to the upper left corner - * of the depth buffer [...] - * - * For hiz resolves, the rectangle must also be 8x4 aligned. Item - * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the - * Ivybridge simulator require the alignment. - * - * To be safe, let's just align the rect for all hiz operations and all - * hardware generations. - * - * However, for some miptree slices of a Z24 texture, emitting an 8x4 - * aligned rectangle that covers the slice may clobber adjacent slices if - * we strictly adhered to the texture alignments specified in the PRM. The - * Ivybridge PRM, Section "Alignment Unit Size", states that - * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 surfaces, - * not 8. But commit 1f112cc increased the alignment from 4 to 8, which - * prevents the clobbering. - */ - params.x1 = minify(params.depth.surf.logical_level0_px.width, - params.depth.view.base_level); - params.y1 = minify(params.depth.surf.logical_level0_px.height, - params.depth.view.base_level); - params.x1 = ALIGN(params.x1, 8); - params.y1 = ALIGN(params.y1, 4); - - if (params.depth.view.base_level == 0) { - /* TODO: What about MSAA? */ - params.depth.surf.logical_level0_px.width = params.x1; - params.depth.surf.logical_level0_px.height = params.y1; - } - - params.dst.surf.samples = params.depth.surf.samples; - params.dst.surf.logical_level0_px = params.depth.surf.logical_level0_px; - params.depth_format = isl_format_get_depth_format(surf->surf->format, false); - - batch->blorp->exec(batch, ¶ms); -} diff --git a/src/mesa/drivers/dri/i965/blorp.h b/src/mesa/drivers/dri/i965/blorp.h deleted file mode 100644 index a4fcfdfcf70..00000000000 --- a/src/mesa/drivers/dri/i965/blorp.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include <stdint.h> -#include <stdbool.h> - -#include "isl/isl.h" - -struct brw_context; -struct brw_wm_prog_key; - -#ifdef __cplusplus -extern "C" { -#endif - -struct blorp_batch; -struct blorp_params; - -struct blorp_context { - void *driver_ctx; - - const struct isl_device *isl_dev; - - const struct brw_compiler *compiler; - - struct { - uint32_t tex; - uint32_t rb; - uint32_t vb; - } mocs; - - bool (*lookup_shader)(struct blorp_context *blorp, - const void *key, uint32_t key_size, - uint32_t *kernel_out, void *prog_data_out); - void (*upload_shader)(struct blorp_context *blorp, - const void *key, uint32_t key_size, - const void *kernel, uint32_t kernel_size, - const void *prog_data, uint32_t prog_data_size, - uint32_t *kernel_out, void *prog_data_out); - void (*exec)(struct blorp_batch *batch, const struct blorp_params *params); -}; - -void blorp_init(struct blorp_context *blorp, void *driver_ctx, - struct isl_device *isl_dev); -void blorp_finish(struct blorp_context *blorp); - -struct blorp_batch { - struct blorp_context *blorp; - void *driver_batch; -}; - -void blorp_batch_init(struct blorp_context *blorp, struct blorp_batch *batch, - void *driver_batch); -void blorp_batch_finish(struct blorp_batch *batch); - -struct blorp_address { - void *buffer; - uint32_t read_domains; - uint32_t write_domain; - uint32_t offset; -}; - -struct blorp_surf -{ - const struct isl_surf *surf; - struct blorp_address addr; - - const struct isl_surf *aux_surf; - struct blorp_address aux_addr; - enum isl_aux_usage aux_usage; - - union isl_color_value clear_color; -}; - -void -blorp_blit(struct blorp_batch *batch, - const struct blorp_surf *src_surf, - unsigned src_level, unsigned src_layer, - enum isl_format src_format, int src_swizzle, - const struct blorp_surf *dst_surf, - unsigned dst_level, unsigned dst_layer, - enum isl_format dst_format, - float src_x0, float src_y0, - float src_x1, float src_y1, - float dst_x0, float dst_y0, - float dst_x1, float dst_y1, - uint32_t filter, bool mirror_x, bool mirror_y); - -void -blorp_fast_clear(struct blorp_batch *batch, - const struct blorp_surf *surf, - uint32_t level, uint32_t layer, - uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1); - -void -blorp_clear(struct blorp_batch *batch, - const struct blorp_surf *surf, - uint32_t level, uint32_t layer, - uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, - enum isl_format format, union isl_color_value clear_color, - bool color_write_disable[4]); - -void -blorp_ccs_resolve(struct blorp_batch *batch, - struct blorp_surf *surf, enum isl_format format); - -/** - * For an overview of the HiZ operations, see the following sections of the - * Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - * - * Of these, two get entered in the resolve map as needing to be done to the - * buffer: depth resolve and hiz resolve. - */ -enum blorp_hiz_op { - BLORP_HIZ_OP_NONE, - BLORP_HIZ_OP_DEPTH_CLEAR, - BLORP_HIZ_OP_DEPTH_RESOLVE, - BLORP_HIZ_OP_HIZ_RESOLVE, -}; - -void -blorp_gen6_hiz_op(struct blorp_batch *batch, - struct blorp_surf *surf, unsigned level, unsigned layer, - enum blorp_hiz_op op); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/blorp_blit.c b/src/mesa/drivers/dri/i965/blorp_blit.c deleted file mode 100644 index 170c3816e38..00000000000 --- a/src/mesa/drivers/dri/i965/blorp_blit.c +++ /dev/null @@ -1,1649 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "program/prog_instruction.h" -#include "compiler/nir/nir_builder.h" - -#include "blorp_priv.h" -#include "brw_meta_util.h" - -#define FILE_DEBUG_FLAG DEBUG_BLORP - -/** - * Enum to specify the order of arguments in a sampler message - */ -enum sampler_message_arg -{ - SAMPLER_MESSAGE_ARG_U_FLOAT, - SAMPLER_MESSAGE_ARG_V_FLOAT, - SAMPLER_MESSAGE_ARG_U_INT, - SAMPLER_MESSAGE_ARG_V_INT, - SAMPLER_MESSAGE_ARG_R_INT, - SAMPLER_MESSAGE_ARG_SI_INT, - SAMPLER_MESSAGE_ARG_MCS_INT, - SAMPLER_MESSAGE_ARG_ZERO_INT, -}; - -struct brw_blorp_blit_vars { - /* Input values from brw_blorp_wm_inputs */ - nir_variable *v_discard_rect; - nir_variable *v_rect_grid; - nir_variable *v_coord_transform; - nir_variable *v_src_z; - - /* gl_FragCoord */ - nir_variable *frag_coord; - - /* gl_FragColor */ - nir_variable *color_out; -}; - -static void -brw_blorp_blit_vars_init(nir_builder *b, struct brw_blorp_blit_vars *v, - const struct brw_blorp_blit_prog_key *key) -{ - /* Blended and scaled blits never use pixel discard. */ - assert(!key->use_kill || !(key->blend && key->blit_scaled)); - -#define LOAD_INPUT(name, type)\ - v->v_##name = nir_variable_create(b->shader, nir_var_shader_in, \ - type, #name); \ - v->v_##name->data.interpolation = INTERP_MODE_FLAT; \ - v->v_##name->data.location = VARYING_SLOT_VAR0 + \ - offsetof(struct brw_blorp_wm_inputs, name) / (4 * sizeof(float)); - - LOAD_INPUT(discard_rect, glsl_vec4_type()) - LOAD_INPUT(rect_grid, glsl_vec4_type()) - LOAD_INPUT(coord_transform, glsl_vec4_type()) - LOAD_INPUT(src_z, glsl_uint_type()) - -#undef LOAD_INPUT - - v->frag_coord = nir_variable_create(b->shader, nir_var_shader_in, - glsl_vec4_type(), "gl_FragCoord"); - v->frag_coord->data.location = VARYING_SLOT_POS; - v->frag_coord->data.origin_upper_left = true; - - v->color_out = nir_variable_create(b->shader, nir_var_shader_out, - glsl_vec4_type(), "gl_FragColor"); - v->color_out->data.location = FRAG_RESULT_COLOR; -} - -static nir_ssa_def * -blorp_blit_get_frag_coords(nir_builder *b, - const struct brw_blorp_blit_prog_key *key, - struct brw_blorp_blit_vars *v) -{ - nir_ssa_def *coord = nir_f2i(b, nir_load_var(b, v->frag_coord)); - - if (key->persample_msaa_dispatch) { - return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1), - nir_load_sample_id(b)); - } else { - return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1)); - } -} - -/** - * Emit code to translate from destination (X, Y) coordinates to source (X, Y) - * coordinates. - */ -static nir_ssa_def * -blorp_blit_apply_transform(nir_builder *b, nir_ssa_def *src_pos, - struct brw_blorp_blit_vars *v) -{ - nir_ssa_def *coord_transform = nir_load_var(b, v->v_coord_transform); - - nir_ssa_def *offset = nir_vec2(b, nir_channel(b, coord_transform, 1), - nir_channel(b, coord_transform, 3)); - nir_ssa_def *mul = nir_vec2(b, nir_channel(b, coord_transform, 0), - nir_channel(b, coord_transform, 2)); - - return nir_ffma(b, src_pos, mul, offset); -} - -static inline void -blorp_nir_discard_if_outside_rect(nir_builder *b, nir_ssa_def *pos, - struct brw_blorp_blit_vars *v) -{ - nir_ssa_def *c0, *c1, *c2, *c3; - nir_ssa_def *discard_rect = nir_load_var(b, v->v_discard_rect); - nir_ssa_def *dst_x0 = nir_channel(b, discard_rect, 0); - nir_ssa_def *dst_x1 = nir_channel(b, discard_rect, 1); - nir_ssa_def *dst_y0 = nir_channel(b, discard_rect, 2); - nir_ssa_def *dst_y1 = nir_channel(b, discard_rect, 3); - - c0 = nir_ult(b, nir_channel(b, pos, 0), dst_x0); - c1 = nir_uge(b, nir_channel(b, pos, 0), dst_x1); - c2 = nir_ult(b, nir_channel(b, pos, 1), dst_y0); - c3 = nir_uge(b, nir_channel(b, pos, 1), dst_y1); - - nir_ssa_def *oob = nir_ior(b, nir_ior(b, c0, c1), nir_ior(b, c2, c3)); - - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); - discard->src[0] = nir_src_for_ssa(oob); - nir_builder_instr_insert(b, &discard->instr); -} - -static nir_tex_instr * -blorp_create_nir_tex_instr(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_texop op, nir_ssa_def *pos, unsigned num_srcs, - nir_alu_type dst_type) -{ - nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); - - tex->op = op; - - tex->dest_type = dst_type; - tex->is_array = false; - tex->is_shadow = false; - - /* Blorp only has one texture and it's bound at unit 0 */ - tex->texture = NULL; - tex->sampler = NULL; - tex->texture_index = 0; - tex->sampler_index = 0; - - /* To properly handle 3-D and 2-D array textures, we pull the Z component - * from an input. TODO: This is a bit magic; we should probably make this - * more explicit in the future. - */ - assert(pos->num_components >= 2); - pos = nir_vec3(b, nir_channel(b, pos, 0), nir_channel(b, pos, 1), - nir_load_var(b, v->v_src_z)); - - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(pos); - tex->coord_components = 3; - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); - - return tex; -} - -static nir_ssa_def * -blorp_nir_tex(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, nir_alu_type dst_type) -{ - nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_tex, pos, 2, dst_type); - - assert(pos->num_components == 2); - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; - tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); - - nir_builder_instr_insert(b, &tex->instr); - - return &tex->dest.ssa; -} - -static nir_ssa_def * -blorp_nir_txf(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, nir_alu_type dst_type) -{ - nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_txf, pos, 2, dst_type); - - tex->sampler_dim = GLSL_SAMPLER_DIM_3D; - tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); - - nir_builder_instr_insert(b, &tex->instr); - - return &tex->dest.ssa; -} - -static nir_ssa_def * -blorp_nir_txf_ms(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, nir_ssa_def *mcs, nir_alu_type dst_type) -{ - nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_txf_ms, pos, - mcs != NULL ? 3 : 2, dst_type); - - tex->sampler_dim = GLSL_SAMPLER_DIM_MS; - - tex->src[1].src_type = nir_tex_src_ms_index; - if (pos->num_components == 2) { - tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); - } else { - assert(pos->num_components == 3); - tex->src[1].src = nir_src_for_ssa(nir_channel(b, pos, 2)); - } - - if (mcs) { - tex->src[2].src_type = nir_tex_src_ms_mcs; - tex->src[2].src = nir_src_for_ssa(mcs); - } - - nir_builder_instr_insert(b, &tex->instr); - - return &tex->dest.ssa; -} - -static nir_ssa_def * -blorp_nir_txf_ms_mcs(nir_builder *b, struct brw_blorp_blit_vars *v, nir_ssa_def *pos) -{ - nir_tex_instr *tex = - blorp_create_nir_tex_instr(b, v, nir_texop_txf_ms_mcs, - pos, 1, nir_type_int); - - tex->sampler_dim = GLSL_SAMPLER_DIM_MS; - - nir_builder_instr_insert(b, &tex->instr); - - return &tex->dest.ssa; -} - -static nir_ssa_def * -nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src, - uint32_t src_mask, int src_left_shift) -{ - nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask)); - - nir_ssa_def *shifted; - if (src_left_shift > 0) { - shifted = nir_ishl(b, masked, nir_imm_int(b, src_left_shift)); - } else if (src_left_shift < 0) { - shifted = nir_ushr(b, masked, nir_imm_int(b, -src_left_shift)); - } else { - assert(src_left_shift == 0); - shifted = masked; - } - - return nir_ior(b, dst, shifted); -} - -/** - * Emit code to compensate for the difference between Y and W tiling. - * - * This code modifies the X and Y coordinates according to the formula: - * - * (X', Y', S') = detile(W-MAJOR, tile(Y-MAJOR, X, Y, S)) - * - * (See brw_blorp_build_nir_shader). - */ -static inline nir_ssa_def * -blorp_nir_retile_y_to_w(nir_builder *b, nir_ssa_def *pos) -{ - assert(pos->num_components == 2); - nir_ssa_def *x_Y = nir_channel(b, pos, 0); - nir_ssa_def *y_Y = nir_channel(b, pos, 1); - - /* Given X and Y coordinates that describe an address using Y tiling, - * translate to the X and Y coordinates that describe the same address - * using W tiling. - * - * If we break down the low order bits of X and Y, using a - * single letter to represent each low-order bit: - * - * X = A << 7 | 0bBCDEFGH - * Y = J << 5 | 0bKLMNP (1) - * - * Then we can apply the Y tiling formula to see the memory offset being - * addressed: - * - * offset = (J * tile_pitch + A) << 12 | 0bBCDKLMNPEFGH (2) - * - * If we apply the W detiling formula to this memory location, that the - * corresponding X' and Y' coordinates are: - * - * X' = A << 6 | 0bBCDPFH (3) - * Y' = J << 6 | 0bKLMNEG - * - * Combining (1) and (3), we see that to transform (X, Y) to (X', Y'), - * we need to make the following computation: - * - * X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1 (4) - * Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1 - */ - nir_ssa_def *x_W = nir_imm_int(b, 0); - x_W = nir_mask_shift_or(b, x_W, x_Y, 0xfffffff4, -1); - x_W = nir_mask_shift_or(b, x_W, y_Y, 0x1, 2); - x_W = nir_mask_shift_or(b, x_W, x_Y, 0x1, 0); - - nir_ssa_def *y_W = nir_imm_int(b, 0); - y_W = nir_mask_shift_or(b, y_W, y_Y, 0xfffffffe, 1); - y_W = nir_mask_shift_or(b, y_W, x_Y, 0x8, -2); - y_W = nir_mask_shift_or(b, y_W, x_Y, 0x2, -1); - - return nir_vec2(b, x_W, y_W); -} - -/** - * Emit code to compensate for the difference between Y and W tiling. - * - * This code modifies the X and Y coordinates according to the formula: - * - * (X', Y', S') = detile(Y-MAJOR, tile(W-MAJOR, X, Y, S)) - * - * (See brw_blorp_build_nir_shader). - */ -static inline nir_ssa_def * -blorp_nir_retile_w_to_y(nir_builder *b, nir_ssa_def *pos) -{ - assert(pos->num_components == 2); - nir_ssa_def *x_W = nir_channel(b, pos, 0); - nir_ssa_def *y_W = nir_channel(b, pos, 1); - - /* Applying the same logic as above, but in reverse, we obtain the - * formulas: - * - * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1 - * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2 - */ - nir_ssa_def *x_Y = nir_imm_int(b, 0); - x_Y = nir_mask_shift_or(b, x_Y, x_W, 0xfffffffa, 1); - x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x2, 2); - x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x1, 1); - x_Y = nir_mask_shift_or(b, x_Y, x_W, 0x1, 0); - - nir_ssa_def *y_Y = nir_imm_int(b, 0); - y_Y = nir_mask_shift_or(b, y_Y, y_W, 0xfffffffc, -1); - y_Y = nir_mask_shift_or(b, y_Y, x_W, 0x4, -2); - - return nir_vec2(b, x_Y, y_Y); -} - -/** - * Emit code to compensate for the difference between MSAA and non-MSAA - * surfaces. - * - * This code modifies the X and Y coordinates according to the formula: - * - * (X', Y', S') = encode_msaa(num_samples, IMS, X, Y, S) - * - * (See brw_blorp_blit_program). - */ -static inline nir_ssa_def * -blorp_nir_encode_msaa(nir_builder *b, nir_ssa_def *pos, - unsigned num_samples, enum isl_msaa_layout layout) -{ - assert(pos->num_components == 2 || pos->num_components == 3); - - switch (layout) { - case ISL_MSAA_LAYOUT_NONE: - assert(pos->num_components == 2); - return pos; - case ISL_MSAA_LAYOUT_ARRAY: - /* No translation needed */ - return pos; - case ISL_MSAA_LAYOUT_INTERLEAVED: { - nir_ssa_def *x_in = nir_channel(b, pos, 0); - nir_ssa_def *y_in = nir_channel(b, pos, 1); - nir_ssa_def *s_in = pos->num_components == 2 ? nir_imm_int(b, 0) : - nir_channel(b, pos, 2); - - nir_ssa_def *x_out = nir_imm_int(b, 0); - nir_ssa_def *y_out = nir_imm_int(b, 0); - switch (num_samples) { - case 2: - case 4: - /* encode_msaa(2, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) - * Y' = Y - * - * encode_msaa(4, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) - * Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1) - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffffe, 1); - x_out = nir_mask_shift_or(b, x_out, s_in, 0x1, 1); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - if (num_samples == 2) { - y_out = y_in; - } else { - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffffe, 1); - y_out = nir_mask_shift_or(b, y_out, s_in, 0x2, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - } - break; - - case 8: - /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 - * | (X & 0b1) - * Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1) - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffffe, 2); - x_out = nir_mask_shift_or(b, x_out, s_in, 0x4, 0); - x_out = nir_mask_shift_or(b, x_out, s_in, 0x1, 1); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffffe, 1); - y_out = nir_mask_shift_or(b, y_out, s_in, 0x2, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - break; - - case 16: - /* encode_msaa(16, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 - * | (X & 0b1) - * Y' = (Y & ~0b1) << 2 | (S & 0b1000) >> 1 (S & 0b10) - * | (Y & 0b1) - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffffe, 2); - x_out = nir_mask_shift_or(b, x_out, s_in, 0x4, 0); - x_out = nir_mask_shift_or(b, x_out, s_in, 0x1, 1); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffffe, 2); - y_out = nir_mask_shift_or(b, y_out, s_in, 0x8, -1); - y_out = nir_mask_shift_or(b, y_out, s_in, 0x2, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - break; - - default: - unreachable("Invalid number of samples for IMS layout"); - } - - return nir_vec2(b, x_out, y_out); - } - - default: - unreachable("Invalid MSAA layout"); - } -} - -/** - * Emit code to compensate for the difference between MSAA and non-MSAA - * surfaces. - * - * This code modifies the X and Y coordinates according to the formula: - * - * (X', Y', S) = decode_msaa(num_samples, IMS, X, Y, S) - * - * (See brw_blorp_blit_program). - */ -static inline nir_ssa_def * -blorp_nir_decode_msaa(nir_builder *b, nir_ssa_def *pos, - unsigned num_samples, enum isl_msaa_layout layout) -{ - assert(pos->num_components == 2 || pos->num_components == 3); - - switch (layout) { - case ISL_MSAA_LAYOUT_NONE: - /* No translation necessary, and S should already be zero. */ - assert(pos->num_components == 2); - return pos; - case ISL_MSAA_LAYOUT_ARRAY: - /* No translation necessary. */ - return pos; - case ISL_MSAA_LAYOUT_INTERLEAVED: { - assert(pos->num_components == 2); - - nir_ssa_def *x_in = nir_channel(b, pos, 0); - nir_ssa_def *y_in = nir_channel(b, pos, 1); - - nir_ssa_def *x_out = nir_imm_int(b, 0); - nir_ssa_def *y_out = nir_imm_int(b, 0); - nir_ssa_def *s_out = nir_imm_int(b, 0); - switch (num_samples) { - case 2: - case 4: - /* decode_msaa(2, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b11) >> 1 | (X & 0b1) - * S = (X & 0b10) >> 1 - * - * decode_msaa(4, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b11) >> 1 | (X & 0b1) - * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) - * S = (Y & 0b10) | (X & 0b10) >> 1 - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffffc, -1); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - if (num_samples == 2) { - y_out = y_in; - s_out = nir_mask_shift_or(b, s_out, x_in, 0x2, -1); - } else { - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffffc, -1); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - s_out = nir_mask_shift_or(b, s_out, x_in, 0x2, -1); - s_out = nir_mask_shift_or(b, s_out, y_in, 0x2, 0); - } - break; - - case 8: - /* decode_msaa(8, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b111) >> 2 | (X & 0b1) - * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) - * S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1 - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffff8, -2); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffffc, -1); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - s_out = nir_mask_shift_or(b, s_out, x_in, 0x4, 0); - s_out = nir_mask_shift_or(b, s_out, y_in, 0x2, 0); - s_out = nir_mask_shift_or(b, s_out, x_in, 0x2, -1); - break; - - case 16: - /* decode_msaa(16, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b111) >> 2 | (X & 0b1) - * Y' = (Y & ~0b111) >> 2 | (Y & 0b1) - * S = (Y & 0b100) << 1 | (X & 0b100) | - * (Y & 0b10) | (X & 0b10) >> 1 - */ - x_out = nir_mask_shift_or(b, x_out, x_in, 0xfffffff8, -2); - x_out = nir_mask_shift_or(b, x_out, x_in, 0x1, 0); - y_out = nir_mask_shift_or(b, y_out, y_in, 0xfffffff8, -2); - y_out = nir_mask_shift_or(b, y_out, y_in, 0x1, 0); - s_out = nir_mask_shift_or(b, s_out, y_in, 0x4, 1); - s_out = nir_mask_shift_or(b, s_out, x_in, 0x4, 0); - s_out = nir_mask_shift_or(b, s_out, y_in, 0x2, 0); - s_out = nir_mask_shift_or(b, s_out, x_in, 0x2, -1); - break; - - default: - unreachable("Invalid number of samples for IMS layout"); - } - - return nir_vec3(b, x_out, y_out, s_out); - } - - default: - unreachable("Invalid MSAA layout"); - } -} - -/** - * Count the number of trailing 1 bits in the given value. For example: - * - * count_trailing_one_bits(0) == 0 - * count_trailing_one_bits(7) == 3 - * count_trailing_one_bits(11) == 2 - */ -static inline int count_trailing_one_bits(unsigned value) -{ -#ifdef HAVE___BUILTIN_CTZ - return __builtin_ctz(~value); -#else - return _mesa_bitcount(value & ~(value + 1)); -#endif -} - -static nir_ssa_def * -blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, unsigned tex_samples, - enum isl_aux_usage tex_aux_usage, - nir_alu_type dst_type) -{ - /* If non-null, this is the outer-most if statement */ - nir_if *outer_if = NULL; - - nir_variable *color = - nir_local_variable_create(b->impl, glsl_vec4_type(), "color"); - - nir_ssa_def *mcs = NULL; - if (tex_aux_usage == ISL_AUX_USAGE_MCS) - mcs = blorp_nir_txf_ms_mcs(b, v, pos); - - /* We add together samples using a binary tree structure, e.g. for 4x MSAA: - * - * result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4 - * - * This ensures that when all samples have the same value, no numerical - * precision is lost, since each addition operation always adds two equal - * values, and summing two equal floating point values does not lose - * precision. - * - * We perform this computation by treating the texture_data array as a - * stack and performing the following operations: - * - * - push sample 0 onto stack - * - push sample 1 onto stack - * - add top two stack entries - * - push sample 2 onto stack - * - push sample 3 onto stack - * - add top two stack entries - * - add top two stack entries - * - divide top stack entry by 4 - * - * Note that after pushing sample i onto the stack, the number of add - * operations we do is equal to the number of trailing 1 bits in i. This - * works provided the total number of samples is a power of two, which it - * always is for i965. - * - * For integer formats, we replace the add operations with average - * operations and skip the final division. - */ - nir_ssa_def *texture_data[5]; - unsigned stack_depth = 0; - for (unsigned i = 0; i < tex_samples; ++i) { - assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ - - /* Push sample i onto the stack */ - assert(stack_depth < ARRAY_SIZE(texture_data)); - - nir_ssa_def *ms_pos = nir_vec3(b, nir_channel(b, pos, 0), - nir_channel(b, pos, 1), - nir_imm_int(b, i)); - texture_data[stack_depth++] = blorp_nir_txf_ms(b, v, ms_pos, mcs, dst_type); - - if (i == 0 && tex_aux_usage == ISL_AUX_USAGE_MCS) { - /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface) - * suggests an optimization: - * - * "A simple optimization with probable large return in - * performance is to compare the MCS value to zero (indicating - * all samples are on sample slice 0), and sample only from - * sample slice 0 using ld2dss if MCS is zero." - * - * Note that in the case where the MCS value is zero, sampling from - * sample slice 0 using ld2dss and sampling from sample 0 using - * ld2dms are equivalent (since all samples are on sample slice 0). - * Since we have already sampled from sample 0, all we need to do is - * skip the remaining fetches and averaging if MCS is zero. - */ - nir_ssa_def *mcs_zero = - nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, 0)); - if (tex_samples == 16) { - mcs_zero = nir_iand(b, mcs_zero, - nir_ieq(b, nir_channel(b, mcs, 1), nir_imm_int(b, 0))); - } - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(mcs_zero); - nir_cf_node_insert(b->cursor, &if_stmt->cf_node); - - b->cursor = nir_after_cf_list(&if_stmt->then_list); - nir_store_var(b, color, texture_data[0], 0xf); - - b->cursor = nir_after_cf_list(&if_stmt->else_list); - outer_if = if_stmt; - } - - for (int j = 0; j < count_trailing_one_bits(i); j++) { - assert(stack_depth >= 2); - --stack_depth; - - assert(dst_type == nir_type_float); - texture_data[stack_depth - 1] = - nir_fadd(b, texture_data[stack_depth - 1], - texture_data[stack_depth]); - } - } - - /* We should have just 1 sample on the stack now. */ - assert(stack_depth == 1); - - texture_data[0] = nir_fmul(b, texture_data[0], - nir_imm_float(b, 1.0 / tex_samples)); - - nir_store_var(b, color, texture_data[0], 0xf); - - if (outer_if) - b->cursor = nir_after_cf_node(&outer_if->cf_node); - - return nir_load_var(b, color); -} - -static inline nir_ssa_def * -nir_imm_vec2(nir_builder *build, float x, float y) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.f32[0] = x; - v.f32[1] = y; - - return nir_build_imm(build, 4, 32, v); -} - -static nir_ssa_def * -blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, - unsigned tex_samples, - const struct brw_blorp_blit_prog_key *key, - struct brw_blorp_blit_vars *v) -{ - nir_ssa_def *pos_xy = nir_channels(b, pos, 0x3); - nir_ssa_def *rect_grid = nir_load_var(b, v->v_rect_grid); - nir_ssa_def *scale = nir_imm_vec2(b, key->x_scale, key->y_scale); - - /* Translate coordinates to lay out the samples in a rectangular grid - * roughly corresponding to sample locations. - */ - pos_xy = nir_fmul(b, pos_xy, scale); - /* Adjust coordinates so that integers represent pixel centers rather - * than pixel edges. - */ - pos_xy = nir_fadd(b, pos_xy, nir_imm_float(b, -0.5)); - /* Clamp the X, Y texture coordinates to properly handle the sampling of - * texels on texture edges. - */ - pos_xy = nir_fmin(b, nir_fmax(b, pos_xy, nir_imm_float(b, 0.0)), - nir_vec2(b, nir_channel(b, rect_grid, 0), - nir_channel(b, rect_grid, 1))); - - /* Store the fractional parts to be used as bilinear interpolation - * coefficients. - */ - nir_ssa_def *frac_xy = nir_ffract(b, pos_xy); - /* Round the float coordinates down to nearest integer */ - pos_xy = nir_fdiv(b, nir_ftrunc(b, pos_xy), scale); - - nir_ssa_def *tex_data[4]; - for (unsigned i = 0; i < 4; ++i) { - float sample_off_x = (float)(i & 0x1) / key->x_scale; - float sample_off_y = (float)((i >> 1) & 0x1) / key->y_scale; - nir_ssa_def *sample_off = nir_imm_vec2(b, sample_off_x, sample_off_y); - - nir_ssa_def *sample_coords = nir_fadd(b, pos_xy, sample_off); - nir_ssa_def *sample_coords_int = nir_f2i(b, sample_coords); - - /* The MCS value we fetch has to match up with the pixel that we're - * sampling from. Since we sample from different pixels in each - * iteration of this "for" loop, the call to mcs_fetch() should be - * here inside the loop after computing the pixel coordinates. - */ - nir_ssa_def *mcs = NULL; - if (key->tex_aux_usage == ISL_AUX_USAGE_MCS) - mcs = blorp_nir_txf_ms_mcs(b, v, sample_coords_int); - - /* Compute sample index and map the sample index to a sample number. - * Sample index layout shows the numbering of slots in a rectangular - * grid of samples with in a pixel. Sample number layout shows the - * rectangular grid of samples roughly corresponding to the real sample - * locations with in a pixel. - * In case of 4x MSAA, layout of sample indices matches the layout of - * sample numbers: - * --------- - * | 0 | 1 | - * --------- - * | 2 | 3 | - * --------- - * - * In case of 8x MSAA the two layouts don't match. - * sample index layout : --------- sample number layout : --------- - * | 0 | 1 | | 3 | 7 | - * --------- --------- - * | 2 | 3 | | 5 | 0 | - * --------- --------- - * | 4 | 5 | | 1 | 2 | - * --------- --------- - * | 6 | 7 | | 4 | 6 | - * --------- --------- - * - * Fortunately, this can be done fairly easily as: - * S' = (0x17306425 >> (S * 4)) & 0xf - * - * In the case of 16x MSAA the two layouts don't match. - * Sample index layout: Sample number layout: - * --------------------- --------------------- - * | 0 | 1 | 2 | 3 | | 15 | 10 | 9 | 7 | - * --------------------- --------------------- - * | 4 | 5 | 6 | 7 | | 4 | 1 | 3 | 13 | - * --------------------- --------------------- - * | 8 | 9 | 10 | 11 | | 12 | 2 | 0 | 6 | - * --------------------- --------------------- - * | 12 | 13 | 14 | 15 | | 11 | 8 | 5 | 14 | - * --------------------- --------------------- - * - * This is equivalent to - * S' = (0xe58b602cd31479af >> (S * 4)) & 0xf - */ - nir_ssa_def *frac = nir_ffract(b, sample_coords); - nir_ssa_def *sample = - nir_fdot2(b, frac, nir_imm_vec2(b, key->x_scale, - key->x_scale * key->y_scale)); - sample = nir_f2i(b, sample); - - if (tex_samples == 8) { - sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573), - nir_ishl(b, sample, nir_imm_int(b, 2))), - nir_imm_int(b, 0xf)); - } else if (tex_samples == 16) { - nir_ssa_def *sample_low = - nir_iand(b, nir_ishr(b, nir_imm_int(b, 0xd31479af), - nir_ishl(b, sample, nir_imm_int(b, 2))), - nir_imm_int(b, 0xf)); - nir_ssa_def *sample_high = - nir_iand(b, nir_ishr(b, nir_imm_int(b, 0xe58b602c), - nir_ishl(b, nir_iadd(b, sample, - nir_imm_int(b, -8)), - nir_imm_int(b, 2))), - nir_imm_int(b, 0xf)); - - sample = nir_bcsel(b, nir_ilt(b, sample, nir_imm_int(b, 8)), - sample_low, sample_high); - } - nir_ssa_def *pos_ms = nir_vec3(b, nir_channel(b, sample_coords_int, 0), - nir_channel(b, sample_coords_int, 1), - sample); - tex_data[i] = blorp_nir_txf_ms(b, v, pos_ms, mcs, key->texture_data_type); - } - - nir_ssa_def *frac_x = nir_channel(b, frac_xy, 0); - nir_ssa_def *frac_y = nir_channel(b, frac_xy, 1); - return nir_flrp(b, nir_flrp(b, tex_data[0], tex_data[1], frac_x), - nir_flrp(b, tex_data[2], tex_data[3], frac_x), - frac_y); -} - -/** - * Generator for WM programs used in BLORP blits. - * - * The bulk of the work done by the WM program is to wrap and unwrap the - * coordinate transformations used by the hardware to store surfaces in - * memory. The hardware transforms a pixel location (X, Y, S) (where S is the - * sample index for a multisampled surface) to a memory offset by the - * following formulas: - * - * offset = tile(tiling_format, encode_msaa(num_samples, layout, X, Y, S)) - * (X, Y, S) = decode_msaa(num_samples, layout, detile(tiling_format, offset)) - * - * For a single-sampled surface, or for a multisampled surface using - * INTEL_MSAA_LAYOUT_UMS, encode_msaa() and decode_msaa are the identity - * function: - * - * encode_msaa(1, NONE, X, Y, 0) = (X, Y, 0) - * decode_msaa(1, NONE, X, Y, 0) = (X, Y, 0) - * encode_msaa(n, UMS, X, Y, S) = (X, Y, S) - * decode_msaa(n, UMS, X, Y, S) = (X, Y, S) - * - * For a 4x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa() - * embeds the sample number into bit 1 of the X and Y coordinates: - * - * encode_msaa(4, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) - * Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1) - * decode_msaa(4, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b11) >> 1 | (X & 0b1) - * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) - * S = (Y & 0b10) | (X & 0b10) >> 1 - * - * For an 8x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa() - * embeds the sample number into bits 1 and 2 of the X coordinate and bit 1 of - * the Y coordinate: - * - * encode_msaa(8, IMS, X, Y, S) = (X', Y', 0) - * where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 | (X & 0b1) - * Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1) - * decode_msaa(8, IMS, X, Y, 0) = (X', Y', S) - * where X' = (X & ~0b111) >> 2 | (X & 0b1) - * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) - * S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1 - * - * For X tiling, tile() combines together the low-order bits of the X and Y - * coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512 - * bytes wide and 8 rows high: - * - * tile(x_tiled, X, Y, S) = A - * where A = tile_num << 12 | offset - * tile_num = (Y' >> 3) * tile_pitch + (X' >> 9) - * offset = (Y' & 0b111) << 9 - * | (X & 0b111111111) - * X' = X * cpp - * Y' = Y + S * qpitch - * detile(x_tiled, A) = (X, Y, S) - * where X = X' / cpp - * Y = Y' % qpitch - * S = Y' / qpitch - * Y' = (tile_num / tile_pitch) << 3 - * | (A & 0b111000000000) >> 9 - * X' = (tile_num % tile_pitch) << 9 - * | (A & 0b111111111) - * - * (In all tiling formulas, cpp is the number of bytes occupied by a single - * sample ("chars per pixel"), tile_pitch is the number of 4k tiles required - * to fill the width of the surface, and qpitch is the spacing (in rows) - * between array slices). - * - * For Y tiling, tile() combines together the low-order bits of the X and Y - * coordinates in the pattern 0bxxxyyyyyxxxx, creating 4k tiles that are 128 - * bytes wide and 32 rows high: - * - * tile(y_tiled, X, Y, S) = A - * where A = tile_num << 12 | offset - * tile_num = (Y' >> 5) * tile_pitch + (X' >> 7) - * offset = (X' & 0b1110000) << 5 - * | (Y' & 0b11111) << 4 - * | (X' & 0b1111) - * X' = X * cpp - * Y' = Y + S * qpitch - * detile(y_tiled, A) = (X, Y, S) - * where X = X' / cpp - * Y = Y' % qpitch - * S = Y' / qpitch - * Y' = (tile_num / tile_pitch) << 5 - * | (A & 0b111110000) >> 4 - * X' = (tile_num % tile_pitch) << 7 - * | (A & 0b111000000000) >> 5 - * | (A & 0b1111) - * - * For W tiling, tile() combines together the low-order bits of the X and Y - * coordinates in the pattern 0bxxxyyyyxyxyx, creating 4k tiles that are 64 - * bytes wide and 64 rows high (note that W tiling is only used for stencil - * buffers, which always have cpp = 1 and S=0): - * - * tile(w_tiled, X, Y, S) = A - * where A = tile_num << 12 | offset - * tile_num = (Y' >> 6) * tile_pitch + (X' >> 6) - * offset = (X' & 0b111000) << 6 - * | (Y' & 0b111100) << 3 - * | (X' & 0b100) << 2 - * | (Y' & 0b10) << 2 - * | (X' & 0b10) << 1 - * | (Y' & 0b1) << 1 - * | (X' & 0b1) - * X' = X * cpp = X - * Y' = Y + S * qpitch - * detile(w_tiled, A) = (X, Y, S) - * where X = X' / cpp = X' - * Y = Y' % qpitch = Y' - * S = Y / qpitch = 0 - * Y' = (tile_num / tile_pitch) << 6 - * | (A & 0b111100000) >> 3 - * | (A & 0b1000) >> 2 - * | (A & 0b10) >> 1 - * X' = (tile_num % tile_pitch) << 6 - * | (A & 0b111000000000) >> 6 - * | (A & 0b10000) >> 2 - * | (A & 0b100) >> 1 - * | (A & 0b1) - * - * Finally, for a non-tiled surface, tile() simply combines together the X and - * Y coordinates in the natural way: - * - * tile(untiled, X, Y, S) = A - * where A = Y * pitch + X' - * X' = X * cpp - * Y' = Y + S * qpitch - * detile(untiled, A) = (X, Y, S) - * where X = X' / cpp - * Y = Y' % qpitch - * S = Y' / qpitch - * X' = A % pitch - * Y' = A / pitch - * - * (In these formulas, pitch is the number of bytes occupied by a single row - * of samples). - */ -static nir_shader * -brw_blorp_build_nir_shader(struct blorp_context *blorp, - const struct brw_blorp_blit_prog_key *key) -{ - const struct brw_device_info *devinfo = blorp->isl_dev->info; - nir_ssa_def *src_pos, *dst_pos, *color; - - /* Sanity checks */ - if (key->dst_tiled_w && key->rt_samples > 1) { - /* If the destination image is W tiled and multisampled, then the thread - * must be dispatched once per sample, not once per pixel. This is - * necessary because after conversion between W and Y tiling, there's no - * guarantee that all samples corresponding to a single pixel will still - * be together. - */ - assert(key->persample_msaa_dispatch); - } - - if (key->blend) { - /* We are blending, which means we won't have an opportunity to - * translate the tiling and sample count for the texture surface. So - * the surface state for the texture must be configured with the correct - * tiling and sample count. - */ - assert(!key->src_tiled_w); - assert(key->tex_samples == key->src_samples); - assert(key->tex_layout == key->src_layout); - assert(key->tex_samples > 0); - } - - if (key->persample_msaa_dispatch) { - /* It only makes sense to do persample dispatch if the render target is - * configured as multisampled. - */ - assert(key->rt_samples > 0); - } - - /* Make sure layout is consistent with sample count */ - assert((key->tex_layout == ISL_MSAA_LAYOUT_NONE) == - (key->tex_samples <= 1)); - assert((key->rt_layout == ISL_MSAA_LAYOUT_NONE) == - (key->rt_samples <= 1)); - assert((key->src_layout == ISL_MSAA_LAYOUT_NONE) == - (key->src_samples <= 1)); - assert((key->dst_layout == ISL_MSAA_LAYOUT_NONE) == - (key->dst_samples <= 1)); - - nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - - struct brw_blorp_blit_vars v; - brw_blorp_blit_vars_init(&b, &v, key); - - dst_pos = blorp_blit_get_frag_coords(&b, key, &v); - - /* Render target and texture hardware don't support W tiling until Gen8. */ - const bool rt_tiled_w = false; - const bool tex_tiled_w = devinfo->gen >= 8 && key->src_tiled_w; - - /* The address that data will be written to is determined by the - * coordinates supplied to the WM thread and the tiling and sample count of - * the render target, according to the formula: - * - * (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset)) - * - * If the actual tiling and sample count of the destination surface are not - * the same as the configuration of the render target, then these - * coordinates are wrong and we have to adjust them to compensate for the - * difference. - */ - if (rt_tiled_w != key->dst_tiled_w || - key->rt_samples != key->dst_samples || - key->rt_layout != key->dst_layout) { - dst_pos = blorp_nir_encode_msaa(&b, dst_pos, key->rt_samples, - key->rt_layout); - /* Now (X, Y, S) = detile(rt_tiling, offset) */ - if (rt_tiled_w != key->dst_tiled_w) - dst_pos = blorp_nir_retile_y_to_w(&b, dst_pos); - /* Now (X, Y, S) = detile(rt_tiling, offset) */ - dst_pos = blorp_nir_decode_msaa(&b, dst_pos, key->dst_samples, - key->dst_layout); - } - - /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)). - * - * That is: X, Y and S now contain the true coordinates and sample index of - * the data that the WM thread should output. - * - * If we need to kill pixels that are outside the destination rectangle, - * now is the time to do it. - */ - if (key->use_kill) { - assert(!(key->blend && key->blit_scaled)); - blorp_nir_discard_if_outside_rect(&b, dst_pos, &v); - } - - src_pos = blorp_blit_apply_transform(&b, nir_i2f(&b, dst_pos), &v); - if (dst_pos->num_components == 3) { - /* The sample coordinate is an integer that we want left alone but - * blorp_blit_apply_transform() blindly applies the transform to all - * three coordinates. Grab the original sample index. - */ - src_pos = nir_vec3(&b, nir_channel(&b, src_pos, 0), - nir_channel(&b, src_pos, 1), - nir_channel(&b, dst_pos, 2)); - } - - /* If the source image is not multisampled, then we want to fetch sample - * number 0, because that's the only sample there is. - */ - if (key->src_samples == 1) - src_pos = nir_channels(&b, src_pos, 0x3); - - /* X, Y, and S are now the coordinates of the pixel in the source image - * that we want to texture from. Exception: if we are blending, then S is - * irrelevant, because we are going to fetch all samples. - */ - if (key->blend && !key->blit_scaled) { - /* Resolves (effecively) use texelFetch, so we need integers and we - * don't care about the sample index if we got one. - */ - src_pos = nir_f2i(&b, nir_channels(&b, src_pos, 0x3)); - - if (devinfo->gen == 6) { - /* Because gen6 only supports 4x interleved MSAA, we can do all the - * blending we need with a single linear-interpolated texture lookup - * at the center of the sample. The texture coordinates to be odd - * integers so that they correspond to the center of a 2x2 block - * representing the four samples that maxe up a pixel. So we need - * to multiply our X and Y coordinates each by 2 and then add 1. - */ - src_pos = nir_ishl(&b, src_pos, nir_imm_int(&b, 1)); - src_pos = nir_iadd(&b, src_pos, nir_imm_int(&b, 1)); - src_pos = nir_i2f(&b, src_pos); - color = blorp_nir_tex(&b, &v, src_pos, key->texture_data_type); - } else { - /* Gen7+ hardware doesn't automaticaly blend. */ - color = blorp_nir_manual_blend_average(&b, &v, src_pos, key->src_samples, - key->tex_aux_usage, - key->texture_data_type); - } - } else if (key->blend && key->blit_scaled) { - assert(!key->use_kill); - color = blorp_nir_manual_blend_bilinear(&b, src_pos, key->src_samples, key, &v); - } else { - if (key->bilinear_filter) { - color = blorp_nir_tex(&b, &v, src_pos, key->texture_data_type); - } else { - /* We're going to use texelFetch, so we need integers */ - if (src_pos->num_components == 2) { - src_pos = nir_f2i(&b, src_pos); - } else { - assert(src_pos->num_components == 3); - src_pos = nir_vec3(&b, nir_channel(&b, nir_f2i(&b, src_pos), 0), - nir_channel(&b, nir_f2i(&b, src_pos), 1), - nir_channel(&b, src_pos, 2)); - } - - /* We aren't blending, which means we just want to fetch a single - * sample from the source surface. The address that we want to fetch - * from is related to the X, Y and S values according to the formula: - * - * (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)). - * - * If the actual tiling and sample count of the source surface are - * not the same as the configuration of the texture, then we need to - * adjust the coordinates to compensate for the difference. - */ - if (tex_tiled_w != key->src_tiled_w || - key->tex_samples != key->src_samples || - key->tex_layout != key->src_layout) { - src_pos = blorp_nir_encode_msaa(&b, src_pos, key->src_samples, - key->src_layout); - /* Now (X, Y, S) = detile(src_tiling, offset) */ - if (tex_tiled_w != key->src_tiled_w) - src_pos = blorp_nir_retile_w_to_y(&b, src_pos); - /* Now (X, Y, S) = detile(tex_tiling, offset) */ - src_pos = blorp_nir_decode_msaa(&b, src_pos, key->tex_samples, - key->tex_layout); - } - - /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)). - * - * In other words: X, Y, and S now contain values which, when passed to - * the texturing unit, will cause data to be read from the correct - * memory location. So we can fetch the texel now. - */ - if (key->src_samples == 1) { - color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type); - } else { - nir_ssa_def *mcs = NULL; - if (key->tex_aux_usage == ISL_AUX_USAGE_MCS) - mcs = blorp_nir_txf_ms_mcs(&b, &v, src_pos); - - color = blorp_nir_txf_ms(&b, &v, src_pos, mcs, key->texture_data_type); - } - } - } - - nir_store_var(&b, v.color_out, color, 0xf); - - return b.shader; -} - -static void -brw_blorp_get_blit_kernel(struct blorp_context *blorp, - struct blorp_params *params, - const struct brw_blorp_blit_prog_key *prog_key) -{ - if (blorp->lookup_shader(blorp, prog_key, sizeof(*prog_key), - ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) - return; - - const unsigned *program; - unsigned program_size; - struct brw_blorp_prog_data prog_data; - - /* Try and compile with NIR first. If that fails, fall back to the old - * method of building shaders manually. - */ - nir_shader *nir = brw_blorp_build_nir_shader(blorp, prog_key); - struct brw_wm_prog_key wm_key; - brw_blorp_init_wm_prog_key(&wm_key); - wm_key.tex.compressed_multisample_layout_mask = - prog_key->tex_aux_usage == ISL_AUX_USAGE_MCS; - wm_key.tex.msaa_16 = prog_key->tex_samples == 16; - wm_key.multisample_fbo = prog_key->rt_samples > 1; - - program = brw_blorp_compile_nir_shader(blorp, nir, &wm_key, false, - &prog_data, &program_size); - - blorp->upload_shader(blorp, prog_key, sizeof(*prog_key), - program, program_size, - &prog_data, sizeof(prog_data), - ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); -} - -static void -brw_blorp_setup_coord_transform(struct brw_blorp_coord_transform *xform, - GLfloat src0, GLfloat src1, - GLfloat dst0, GLfloat dst1, - bool mirror) -{ - float scale = (src1 - src0) / (dst1 - dst0); - if (!mirror) { - /* When not mirroring a coordinate (say, X), we need: - * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale - * Therefore: - * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale - * - * blorp program uses "round toward zero" to convert the - * transformed floating point coordinates to integer coordinates, - * whereas the behaviour we actually want is "round to nearest", - * so 0.5 provides the necessary correction. - */ - xform->multiplier = scale; - xform->offset = src0 + (-dst0 + 0.5f) * scale; - } else { - /* When mirroring X we need: - * src_x - src_x0 = dst_x1 - dst_x - 0.5 - * Therefore: - * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale - */ - xform->multiplier = -scale; - xform->offset = src0 + (dst1 - 0.5f) * scale; - } -} - -/** - * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ - * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are - * - * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE - * 0 1 2 3 4 5 - * 4 5 6 7 0 1 - * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE - * - * which is simply adding 4 then modding by 8 (or anding with 7). - * - * We then may need to apply workarounds for textureGather hardware bugs. - */ -static enum isl_channel_select -swizzle_to_scs(GLenum swizzle) -{ - return (enum isl_channel_select)((swizzle + 4) & 7); -} - -static void -surf_convert_to_single_slice(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info) -{ - /* This only makes sense for a single level and array slice */ - assert(info->view.levels == 1 && info->view.array_len == 1); - - /* Just bail if we have nothing to do. */ - if (info->surf.dim == ISL_SURF_DIM_2D && - info->view.base_level == 0 && info->view.base_array_layer == 0 && - info->surf.levels == 0 && info->surf.logical_level0_px.array_len == 0) - return; - - uint32_t x_offset_sa, y_offset_sa; - isl_surf_get_image_offset_sa(&info->surf, info->view.base_level, - info->view.base_array_layer, 0, - &x_offset_sa, &y_offset_sa); - - uint32_t byte_offset; - isl_tiling_get_intratile_offset_sa(isl_dev, info->surf.tiling, - info->view.format, info->surf.row_pitch, - x_offset_sa, y_offset_sa, - &byte_offset, - &info->tile_x_sa, &info->tile_y_sa); - info->addr.offset += byte_offset; - - /* TODO: Once this file gets converted to C, we shouls just use designated - * initializers. - */ - struct isl_surf_init_info init_info = { 0, }; - - init_info.dim = ISL_SURF_DIM_2D; - init_info.format = ISL_FORMAT_R8_UINT; - init_info.width = - minify(info->surf.logical_level0_px.width, info->view.base_level); - init_info.height = - minify(info->surf.logical_level0_px.height, info->view.base_level); - init_info.depth = 1; - init_info.levels = 1; - init_info.array_len = 1; - init_info.samples = info->surf.samples; - init_info.min_pitch = info->surf.row_pitch; - init_info.usage = info->surf.usage; - init_info.tiling_flags = 1 << info->surf.tiling; - - isl_surf_init_s(isl_dev, &info->surf, &init_info); - assert(info->surf.row_pitch == init_info.min_pitch); - - /* The view is also different now. */ - info->view.base_level = 0; - info->view.levels = 1; - info->view.base_array_layer = 0; - info->view.array_len = 1; -} - -static void -surf_fake_interleaved_msaa(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info) -{ - assert(info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED); - - /* First, we need to convert it to a simple 1-level 1-layer 2-D surface */ - surf_convert_to_single_slice(isl_dev, info); - - info->surf.logical_level0_px = info->surf.phys_level0_sa; - info->surf.samples = 1; - info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; -} - -static void -surf_retile_w_to_y(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info) -{ - assert(info->surf.tiling == ISL_TILING_W); - - /* First, we need to convert it to a simple 1-level 1-layer 2-D surface */ - surf_convert_to_single_slice(isl_dev, info); - - /* On gen7+, we don't have interleaved multisampling for color render - * targets so we have to fake it. - * - * TODO: Are we sure we don't also need to fake it on gen6? - */ - if (isl_dev->info->gen > 6 && - info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { - info->surf.logical_level0_px = info->surf.phys_level0_sa; - info->surf.samples = 1; - info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE; - } - - if (isl_dev->info->gen == 6) { - /* Gen6 stencil buffers have a very large alignment coming in from the - * miptree. It's out-of-bounds for what the surface state can handle. - * Since we have a single layer and level, it doesn't really matter as - * long as we don't pass a bogus value into isl_surf_fill_state(). - */ - info->surf.image_alignment_el = isl_extent3d(4, 2, 1); - } - - /* Now that we've converted everything to a simple 2-D surface with only - * one miplevel, we can go about retiling it. - */ - const unsigned x_align = 8, y_align = info->surf.samples != 0 ? 8 : 4; - info->surf.tiling = ISL_TILING_Y0; - info->surf.logical_level0_px.width = - ALIGN(info->surf.logical_level0_px.width, x_align) * 2; - info->surf.logical_level0_px.height = - ALIGN(info->surf.logical_level0_px.height, y_align) / 2; - info->tile_x_sa *= 2; - info->tile_y_sa /= 2; -} - -void -blorp_blit(struct blorp_batch *batch, - const struct blorp_surf *src_surf, - unsigned src_level, unsigned src_layer, - enum isl_format src_format, int src_swizzle, - const struct blorp_surf *dst_surf, - unsigned dst_level, unsigned dst_layer, - enum isl_format dst_format, - float src_x0, float src_y0, - float src_x1, float src_y1, - float dst_x0, float dst_y0, - float dst_x1, float dst_y1, - GLenum filter, bool mirror_x, bool mirror_y) -{ - const struct brw_device_info *devinfo = batch->blorp->isl_dev->info; - - struct blorp_params params; - blorp_params_init(¶ms); - - brw_blorp_surface_info_init(batch->blorp, ¶ms.src, src_surf, src_level, - src_layer, src_format, false); - brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, dst_surf, dst_level, - dst_layer, dst_format, true); - - struct brw_blorp_blit_prog_key wm_prog_key; - memset(&wm_prog_key, 0, sizeof(wm_prog_key)); - - if (isl_format_has_sint_channel(params.src.view.format)) { - wm_prog_key.texture_data_type = nir_type_int; - } else if (isl_format_has_uint_channel(params.src.view.format)) { - wm_prog_key.texture_data_type = nir_type_uint; - } else { - wm_prog_key.texture_data_type = nir_type_float; - } - - /* Scaled blitting or not. */ - wm_prog_key.blit_scaled = - ((dst_x1 - dst_x0) == (src_x1 - src_x0) && - (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true; - - /* Scaling factors used for bilinear filtering in multisample scaled - * blits. - */ - if (params.src.surf.samples == 16) - wm_prog_key.x_scale = 4.0f; - else - wm_prog_key.x_scale = 2.0f; - wm_prog_key.y_scale = params.src.surf.samples / wm_prog_key.x_scale; - - if (filter == GL_LINEAR && - params.src.surf.samples <= 1 && params.dst.surf.samples <= 1) - wm_prog_key.bilinear_filter = true; - - if ((params.src.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) == 0 && - (params.src.surf.usage & ISL_SURF_USAGE_STENCIL_BIT) == 0 && - !isl_format_has_int_channel(params.src.surf.format) && - params.src.surf.samples > 1 && params.dst.surf.samples <= 1) { - /* We are downsampling a non-integer color buffer, so blend. - * - * Regarding integer color buffers, the OpenGL ES 3.2 spec says: - * - * "If the source formats are integer types or stencil values, a - * single sample's value is selected for each pixel." - * - * This implies we should not blend in that case. - */ - wm_prog_key.blend = true; - } - - /* src_samples and dst_samples are the true sample counts */ - wm_prog_key.src_samples = params.src.surf.samples; - wm_prog_key.dst_samples = params.dst.surf.samples; - - wm_prog_key.tex_aux_usage = params.src.aux_usage; - - /* src_layout and dst_layout indicate the true MSAA layout used by src and - * dst. - */ - wm_prog_key.src_layout = params.src.surf.msaa_layout; - wm_prog_key.dst_layout = params.dst.surf.msaa_layout; - - /* Round floating point values to nearest integer to avoid "off by one texel" - * kind of errors when blitting. - */ - params.x0 = params.wm_inputs.discard_rect.x0 = roundf(dst_x0); - params.y0 = params.wm_inputs.discard_rect.y0 = roundf(dst_y0); - params.x1 = params.wm_inputs.discard_rect.x1 = roundf(dst_x1); - params.y1 = params.wm_inputs.discard_rect.y1 = roundf(dst_y1); - - params.wm_inputs.rect_grid.x1 = - minify(params.src.surf.logical_level0_px.width, src_level) * - wm_prog_key.x_scale - 1.0f; - params.wm_inputs.rect_grid.y1 = - minify(params.src.surf.logical_level0_px.height, src_level) * - wm_prog_key.y_scale - 1.0f; - - brw_blorp_setup_coord_transform(¶ms.wm_inputs.coord_transform[0], - src_x0, src_x1, dst_x0, dst_x1, mirror_x); - brw_blorp_setup_coord_transform(¶ms.wm_inputs.coord_transform[1], - src_y0, src_y1, dst_y0, dst_y1, mirror_y); - - /* For some texture types, we need to pass the layer through the sampler. */ - params.wm_inputs.src_z = params.src.z_offset; - - if (devinfo->gen > 6 && - params.dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { - assert(params.dst.surf.samples > 1); - - /* We must expand the rectangle we send through the rendering pipeline, - * to account for the fact that we are mapping the destination region as - * single-sampled when it is in fact multisampled. We must also align - * it to a multiple of the multisampling pattern, because the - * differences between multisampled and single-sampled surface formats - * will mean that pixels are scrambled within the multisampling pattern. - * TODO: what if this makes the coordinates too large? - * - * Note: this only works if the destination surface uses the IMS layout. - * If it's UMS, then we have no choice but to set up the rendering - * pipeline as multisampled. - */ - switch (params.dst.surf.samples) { - case 2: - params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4); - params.y0 = ROUND_DOWN_TO(params.y0, 4); - params.x1 = ALIGN(params.x1 * 2, 4); - params.y1 = ALIGN(params.y1, 4); - break; - case 4: - params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4); - params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4); - params.x1 = ALIGN(params.x1 * 2, 4); - params.y1 = ALIGN(params.y1 * 2, 4); - break; - case 8: - params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8); - params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4); - params.x1 = ALIGN(params.x1 * 4, 8); - params.y1 = ALIGN(params.y1 * 2, 4); - break; - case 16: - params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8); - params.y0 = ROUND_DOWN_TO(params.y0 * 4, 8); - params.x1 = ALIGN(params.x1 * 4, 8); - params.y1 = ALIGN(params.y1 * 4, 8); - break; - default: - unreachable("Unrecognized sample count in brw_blorp_blit_params ctor"); - } - - surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms.dst); - - wm_prog_key.use_kill = true; - } - - if (params.dst.surf.tiling == ISL_TILING_W) { - /* We must modify the rectangle we send through the rendering pipeline - * (and the size and x/y offset of the destination surface), to account - * for the fact that we are mapping it as Y-tiled when it is in fact - * W-tiled. - * - * Both Y tiling and W tiling can be understood as organizations of - * 32-byte sub-tiles; within each 32-byte sub-tile, the layout of pixels - * is different, but the layout of the 32-byte sub-tiles within the 4k - * tile is the same (8 sub-tiles across by 16 sub-tiles down, in - * column-major order). In Y tiling, the sub-tiles are 16 bytes wide - * and 2 rows high; in W tiling, they are 8 bytes wide and 4 rows high. - * - * Therefore, to account for the layout differences within the 32-byte - * sub-tiles, we must expand the rectangle so the X coordinates of its - * edges are multiples of 8 (the W sub-tile width), and its Y - * coordinates of its edges are multiples of 4 (the W sub-tile height). - * Then we need to scale the X and Y coordinates of the rectangle to - * account for the differences in aspect ratio between the Y and W - * sub-tiles. We need to modify the layer width and height similarly. - * - * A correction needs to be applied when MSAA is in use: since - * INTEL_MSAA_LAYOUT_IMS uses an interleaving pattern whose height is 4, - * we need to align the Y coordinates to multiples of 8, so that when - * they are divided by two they are still multiples of 4. - * - * Note: Since the x/y offset of the surface will be applied using the - * SURFACE_STATE command packet, it will be invisible to the swizzling - * code in the shader; therefore it needs to be in a multiple of the - * 32-byte sub-tile size. Fortunately it is, since the sub-tile is 8 - * pixels wide and 4 pixels high (when viewed as a W-tiled stencil - * buffer), and the miplevel alignment used for stencil buffers is 8 - * pixels horizontally and either 4 or 8 pixels vertically (see - * intel_horizontal_texture_alignment_unit() and - * intel_vertical_texture_alignment_unit()). - * - * Note: Also, since the SURFACE_STATE command packet can only apply - * offsets that are multiples of 4 pixels horizontally and 2 pixels - * vertically, it is important that the offsets will be multiples of - * these sizes after they are converted into Y-tiled coordinates. - * Fortunately they will be, since we know from above that the offsets - * are a multiple of the 32-byte sub-tile size, and in Y-tiled - * coordinates the sub-tile is 16 pixels wide and 2 pixels high. - * - * TODO: what if this makes the coordinates (or the texture size) too - * large? - */ - const unsigned x_align = 8, y_align = params.dst.surf.samples != 0 ? 8 : 4; - params.x0 = ROUND_DOWN_TO(params.x0, x_align) * 2; - params.y0 = ROUND_DOWN_TO(params.y0, y_align) / 2; - params.x1 = ALIGN(params.x1, x_align) * 2; - params.y1 = ALIGN(params.y1, y_align) / 2; - - /* Retile the surface to Y-tiled */ - surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms.dst); - - wm_prog_key.dst_tiled_w = true; - wm_prog_key.use_kill = true; - - if (params.dst.surf.samples > 1) { - /* If the destination surface is a W-tiled multisampled stencil - * buffer that we're mapping as Y tiled, then we need to arrange for - * the WM program to run once per sample rather than once per pixel, - * because the memory layout of related samples doesn't match between - * W and Y tiling. - */ - wm_prog_key.persample_msaa_dispatch = true; - } - } - - if (devinfo->gen < 8 && params.src.surf.tiling == ISL_TILING_W) { - /* On Haswell and earlier, we have to fake W-tiled sources as Y-tiled. - * Broadwell adds support for sampling from stencil. - * - * See the comments above concerning x/y offset alignment for the - * destination surface. - * - * TODO: what if this makes the texture size too large? - */ - surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms.src); - - wm_prog_key.src_tiled_w = true; - } - - /* tex_samples and rt_samples are the sample counts that are set up in - * SURFACE_STATE. - */ - wm_prog_key.tex_samples = params.src.surf.samples; - wm_prog_key.rt_samples = params.dst.surf.samples; - - /* tex_layout and rt_layout indicate the MSAA layout the GPU pipeline will - * use to access the source and destination surfaces. - */ - wm_prog_key.tex_layout = params.src.surf.msaa_layout; - wm_prog_key.rt_layout = params.dst.surf.msaa_layout; - - if (params.src.surf.samples > 0 && params.dst.surf.samples > 1) { - /* We are blitting from a multisample buffer to a multisample buffer, so - * we must preserve samples within a pixel. This means we have to - * arrange for the WM program to run once per sample rather than once - * per pixel. - */ - wm_prog_key.persample_msaa_dispatch = true; - } - - brw_blorp_get_blit_kernel(batch->blorp, ¶ms, &wm_prog_key); - - for (unsigned i = 0; i < 4; i++) { - params.src.view.channel_select[i] = - swizzle_to_scs(GET_SWZ(src_swizzle, i)); - } - - batch->blorp->exec(batch, ¶ms); -} diff --git a/src/mesa/drivers/dri/i965/blorp_clear.c b/src/mesa/drivers/dri/i965/blorp_clear.c deleted file mode 100644 index 4ab0fe137a3..00000000000 --- a/src/mesa/drivers/dri/i965/blorp_clear.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/ralloc.h" - -#include "blorp_priv.h" -#include "brw_defines.h" - -#include "nir_builder.h" - -#define FILE_DEBUG_FLAG DEBUG_BLORP - -struct brw_blorp_const_color_prog_key -{ - bool use_simd16_replicated_data; - bool pad[3]; -}; - -static void -blorp_params_get_clear_kernel(struct blorp_context *blorp, - struct blorp_params *params, - bool use_replicated_data) -{ - struct brw_blorp_const_color_prog_key blorp_key; - memset(&blorp_key, 0, sizeof(blorp_key)); - blorp_key.use_simd16_replicated_data = use_replicated_data; - - if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), - ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) - return; - - void *mem_ctx = ralloc_context(NULL); - - nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear"); - - nir_variable *v_color = nir_variable_create(b.shader, nir_var_shader_in, - glsl_vec4_type(), "v_color"); - v_color->data.location = VARYING_SLOT_VAR0; - v_color->data.interpolation = INTERP_MODE_FLAT; - - nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, - glsl_vec4_type(), - "gl_FragColor"); - frag_color->data.location = FRAG_RESULT_COLOR; - - nir_copy_var(&b, frag_color, v_color); - - struct brw_wm_prog_key wm_key; - brw_blorp_init_wm_prog_key(&wm_key); - - struct brw_blorp_prog_data prog_data; - unsigned program_size; - const unsigned *program = - brw_blorp_compile_nir_shader(blorp, b.shader, &wm_key, use_replicated_data, - &prog_data, &program_size); - - blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), - program, program_size, - &prog_data, sizeof(prog_data), - ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); - - ralloc_free(mem_ctx); -} - -/* The x0, y0, x1, and y1 parameters must already be populated with the render - * area of the framebuffer to be cleared. - */ -static void -get_fast_clear_rect(const struct isl_device *dev, - const struct isl_surf *aux_surf, - unsigned *x0, unsigned *y0, - unsigned *x1, unsigned *y1) -{ - unsigned int x_align, y_align; - unsigned int x_scaledown, y_scaledown; - - /* Only single sampled surfaces need to (and actually can) be resolved. */ - if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) { - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render - * Target(s)", beneath the "Fast Color Clear" bullet (p327): - * - * Clear pass must have a clear rectangle that must follow - * alignment rules in terms of pixels and lines as shown in the - * table below. Further, the clear-rectangle height and width - * must be multiple of the following dimensions. If the height - * and width of the render target being cleared do not meet these - * requirements, an MCS buffer can be created such that it - * follows the requirement and covers the RT. - * - * The alignment size in the table that follows is related to the - * alignment size that is baked into the CCS surface format but with X - * alignment multiplied by 16 and Y alignment multiplied by 32. - */ - x_align = isl_format_get_layout(aux_surf->format)->bw; - y_align = isl_format_get_layout(aux_surf->format)->bh; - - x_align *= 16; - - /* SKL+ line alignment requirement for Y-tiled are half those of the prior - * generations. - */ - if (dev->info->gen >= 9) - y_align *= 16; - else - y_align *= 32; - - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render - * Target(s)", beneath the "Fast Color Clear" bullet (p327): - * - * In order to optimize the performance MCS buffer (when bound to - * 1X RT) clear similarly to MCS buffer clear for MSRT case, - * clear rect is required to be scaled by the following factors - * in the horizontal and vertical directions: - * - * The X and Y scale down factors in the table that follows are each - * equal to half the alignment value computed above. - */ - x_scaledown = x_align / 2; - y_scaledown = y_align / 2; - - /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel - * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color - * Clear of Non-MultiSampled Render Target Restrictions": - * - * Clear rectangle must be aligned to two times the number of - * pixels in the table shown below due to 16x16 hashing across the - * slice. - */ - x_align *= 2; - y_align *= 2; - } else { - assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); - - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render - * Target(s)", beneath the "MSAA Compression" bullet (p326): - * - * Clear pass for this case requires that scaled down primitive - * is sent down with upper left co-ordinate to coincide with - * actual rectangle being cleared. For MSAA, clear rectangle’s - * height and width need to as show in the following table in - * terms of (width,height) of the RT. - * - * MSAA Width of Clear Rect Height of Clear Rect - * 2X Ceil(1/8*width) Ceil(1/2*height) - * 4X Ceil(1/8*width) Ceil(1/2*height) - * 8X Ceil(1/2*width) Ceil(1/2*height) - * 16X width Ceil(1/2*height) - * - * The text "with upper left co-ordinate to coincide with actual - * rectangle being cleared" is a little confusing--it seems to imply - * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to - * feed the pipeline using the rectangle (x,y) to - * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on - * the number of samples. Experiments indicate that this is not - * quite correct; actually, what the hardware appears to do is to - * align whatever rectangle is sent down the pipeline to the nearest - * multiple of 2x2 blocks, and then scale it up by a factor of N - * horizontally and 2 vertically. So the resulting alignment is 4 - * vertically and either 4 or 16 horizontally, and the scaledown - * factor is 2 vertically and either 2 or 8 horizontally. - */ - switch (aux_surf->format) { - case ISL_FORMAT_MCS_2X: - case ISL_FORMAT_MCS_4X: - x_scaledown = 8; - break; - case ISL_FORMAT_MCS_8X: - x_scaledown = 2; - break; - case ISL_FORMAT_MCS_16X: - x_scaledown = 1; - break; - default: - unreachable("Unexpected MCS format for fast clear"); - } - y_scaledown = 2; - x_align = x_scaledown * 2; - y_align = y_scaledown * 2; - } - - *x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown; - *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown; - *x1 = ALIGN(*x1, x_align) / x_scaledown; - *y1 = ALIGN(*y1, y_align) / y_scaledown; -} - -void -blorp_fast_clear(struct blorp_batch *batch, - const struct blorp_surf *surf, - uint32_t level, uint32_t layer, - uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) -{ - struct blorp_params params; - blorp_params_init(¶ms); - - params.x0 = x0; - params.y0 = y0; - params.x1 = x1; - params.y1 = y1; - - memset(¶ms.wm_inputs, 0xff, 4*sizeof(float)); - params.fast_clear_op = BLORP_FAST_CLEAR_OP_CLEAR; - - get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf, - ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); - - blorp_params_get_clear_kernel(batch->blorp, ¶ms, true); - - brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, layer, - surf->surf->format, true); - - batch->blorp->exec(batch, ¶ms); -} - - -void -blorp_clear(struct blorp_batch *batch, - const struct blorp_surf *surf, - uint32_t level, uint32_t layer, - uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, - enum isl_format format, union isl_color_value clear_color, - bool color_write_disable[4]) -{ - struct blorp_params params; - blorp_params_init(¶ms); - - params.x0 = x0; - params.y0 = y0; - params.x1 = x1; - params.y1 = y1; - - memcpy(¶ms.wm_inputs, clear_color.f32, sizeof(float) * 4); - - bool use_simd16_replicated_data = true; - - /* From the SNB PRM (Vol4_Part1): - * - * "Replicated data (Message Type = 111) is only supported when - * accessing tiled memory. Using this Message Type to access linear - * (untiled) memory is UNDEFINED." - */ - if (surf->surf->tiling == ISL_TILING_LINEAR) - use_simd16_replicated_data = false; - - /* Constant color writes ignore everyting in blend and color calculator - * state. This is not documented. - */ - for (unsigned i = 0; i < 4; i++) { - params.color_write_disable[i] = color_write_disable[i]; - if (color_write_disable[i]) - use_simd16_replicated_data = false; - } - - blorp_params_get_clear_kernel(batch->blorp, ¶ms, - use_simd16_replicated_data); - - brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, layer, - format, true); - - batch->blorp->exec(batch, ¶ms); -} - -void -blorp_ccs_resolve(struct blorp_batch *batch, - struct blorp_surf *surf, enum isl_format format) -{ - struct blorp_params params; - blorp_params_init(¶ms); - - brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, - 0 /* level */, 0 /* layer */, format, true); - - /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve": - * - * A rectangle primitive must be scaled down by the following factors - * with respect to render target being resolved. - * - * The scaledown factors in the table that follows are related to the block - * size of the CCS format. For IVB and HSW, we divide by two, for BDW we - * multiply by 8 and 16. On Sky Lake, we multiply by 8. - */ - const struct isl_format_layout *aux_fmtl = - isl_format_get_layout(params.dst.aux_surf.format); - assert(aux_fmtl->txc == ISL_TXC_CCS); - - unsigned x_scaledown, y_scaledown; - if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) { - x_scaledown = aux_fmtl->bw * 8; - y_scaledown = aux_fmtl->bh * 8; - } else if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { - x_scaledown = aux_fmtl->bw * 8; - y_scaledown = aux_fmtl->bh * 16; - } else { - x_scaledown = aux_fmtl->bw / 2; - y_scaledown = aux_fmtl->bh / 2; - } - params.x0 = params.y0 = 0; - params.x1 = params.dst.aux_surf.logical_level0_px.width; - params.y1 = params.dst.aux_surf.logical_level0_px.height; - params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; - params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; - - if (batch->blorp->isl_dev->info->gen >= 9) { - if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) - params.fast_clear_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; - else - params.fast_clear_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; - } else { - /* Broadwell and earlier do not have a partial resolve */ - params.fast_clear_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; - } - - /* Note: there is no need to initialize push constants because it doesn't - * matter what data gets dispatched to the render target. However, we must - * ensure that the fragment shader delivers the data using the "replicated - * color" message. - */ - - blorp_params_get_clear_kernel(batch->blorp, ¶ms, true); - - batch->blorp->exec(batch, ¶ms); -} diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h b/src/mesa/drivers/dri/i965/blorp_priv.h deleted file mode 100644 index 33f197b523d..00000000000 --- a/src/mesa/drivers/dri/i965/blorp_priv.h +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include <stdint.h> - -#include "compiler/nir/nir.h" -#include "brw_compiler.h" - -#include "blorp.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Binding table indices used by BLORP. - */ -enum { - BLORP_RENDERBUFFER_BT_INDEX, - BLORP_TEXTURE_BT_INDEX, - BLORP_NUM_BT_ENTRIES -}; - -enum blorp_fast_clear_op { - BLORP_FAST_CLEAR_OP_NONE = 0, - BLORP_FAST_CLEAR_OP_CLEAR, - BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL, - BLORP_FAST_CLEAR_OP_RESOLVE_FULL, -}; - -struct brw_blorp_surface_info -{ - struct isl_surf surf; - struct blorp_address addr; - - struct isl_surf aux_surf; - struct blorp_address aux_addr; - enum isl_aux_usage aux_usage; - - union isl_color_value clear_color; - - struct isl_view view; - - /* Z offset into a 3-D texture or slice of a 2-D array texture. */ - uint32_t z_offset; - - uint32_t tile_x_sa, tile_y_sa; -}; - -void -brw_blorp_surface_info_init(struct blorp_context *blorp, - struct brw_blorp_surface_info *info, - const struct blorp_surf *surf, - unsigned int level, unsigned int layer, - enum isl_format format, bool is_render_target); - - -struct brw_blorp_coord_transform -{ - float multiplier; - float offset; -}; - -/** - * Bounding rectangle telling pixel discard which pixels are not to be - * touched. This is needed in when surfaces are configured as something else - * what they really are: - * - * - writing W-tiled stencil as Y-tiled - * - writing interleaved multisampled as single sampled. - * - * See blorp_nir_discard_if_outside_rect(). - */ -struct brw_blorp_discard_rect -{ - uint32_t x0; - uint32_t x1; - uint32_t y0; - uint32_t y1; -}; - -/** - * Grid needed for blended and scaled blits of integer formats, see - * blorp_nir_manual_blend_bilinear(). - */ -struct brw_blorp_rect_grid -{ - float x1; - float y1; - float pad[2]; -}; - -struct brw_blorp_wm_inputs -{ - struct brw_blorp_discard_rect discard_rect; - struct brw_blorp_rect_grid rect_grid; - struct brw_blorp_coord_transform coord_transform[2]; - - /* Minimum layer setting works for all the textures types but texture_3d - * for which the setting has no effect. Use the z-coordinate instead. - */ - uint32_t src_z; - - /* Pad out to an integral number of registers */ - uint32_t pad[3]; -}; - -struct brw_blorp_prog_data -{ - bool dispatch_8; - bool dispatch_16; - - uint8_t first_curbe_grf_0; - uint8_t first_curbe_grf_2; - - uint32_t ksp_offset_2; - - /** - * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more - * than one sample per pixel. - */ - bool persample_msaa_dispatch; - - /** - * Mask of which FS inputs are marked flat by the shader source. This is - * needed for setting up 3DSTATE_SF/SBE. - */ - uint32_t flat_inputs; - unsigned num_varying_inputs; - uint64_t inputs_read; -}; - -static inline unsigned -brw_blorp_get_urb_length(const struct brw_blorp_prog_data *prog_data) -{ - if (prog_data == NULL) - return 1; - - /* From the BSpec: 3D Pipeline - Strips and Fans - 3DSTATE_SBE - * - * read_length = ceiling((max_source_attr+1)/2) - */ - return MAX2((prog_data->num_varying_inputs + 1) / 2, 1); -} - -struct blorp_params -{ - uint32_t x0; - uint32_t y0; - uint32_t x1; - uint32_t y1; - struct brw_blorp_surface_info depth; - uint32_t depth_format; - struct brw_blorp_surface_info src; - struct brw_blorp_surface_info dst; - enum blorp_hiz_op hiz_op; - enum blorp_fast_clear_op fast_clear_op; - bool color_write_disable[4]; - struct brw_blorp_wm_inputs wm_inputs; - unsigned num_draw_buffers; - unsigned num_layers; - uint32_t wm_prog_kernel; - struct brw_blorp_prog_data *wm_prog_data; -}; - -void blorp_params_init(struct blorp_params *params); - -struct brw_blorp_blit_prog_key -{ - /* Number of samples per pixel that have been configured in the surface - * state for texturing from. - */ - unsigned tex_samples; - - /* MSAA layout that has been configured in the surface state for texturing - * from. - */ - enum isl_msaa_layout tex_layout; - - enum isl_aux_usage tex_aux_usage; - - /* Actual number of samples per pixel in the source image. */ - unsigned src_samples; - - /* Actual MSAA layout used by the source image. */ - enum isl_msaa_layout src_layout; - - /* Number of samples per pixel that have been configured in the render - * target. - */ - unsigned rt_samples; - - /* MSAA layout that has been configured in the render target. */ - enum isl_msaa_layout rt_layout; - - /* Actual number of samples per pixel in the destination image. */ - unsigned dst_samples; - - /* Actual MSAA layout used by the destination image. */ - enum isl_msaa_layout dst_layout; - - /* Type of the data to be read from the texture (one of - * nir_type_(int|uint|float)). - */ - nir_alu_type texture_data_type; - - /* True if the source image is W tiled. If true, the surface state for the - * source image must be configured as Y tiled, and tex_samples must be 0. - */ - bool src_tiled_w; - - /* True if the destination image is W tiled. If true, the surface state - * for the render target must be configured as Y tiled, and rt_samples must - * be 0. - */ - bool dst_tiled_w; - - /* True if all source samples should be blended together to produce each - * destination pixel. If true, src_tiled_w must be false, tex_samples must - * equal src_samples, and tex_samples must be nonzero. - */ - bool blend; - - /* True if the rectangle being sent through the rendering pipeline might be - * larger than the destination rectangle, so the WM program should kill any - * pixels that are outside the destination rectangle. - */ - bool use_kill; - - /** - * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more - * than one sample per pixel. - */ - bool persample_msaa_dispatch; - - /* True for scaled blitting. */ - bool blit_scaled; - - /* Scale factors between the pixel grid and the grid of samples. We're - * using grid of samples for bilinear filetring in multisample scaled blits. - */ - float x_scale; - float y_scale; - - /* True for blits with filter = GL_LINEAR. */ - bool bilinear_filter; -}; - -/** - * \name BLORP internals - * \{ - * - * Used internally by gen6_blorp_exec() and gen7_blorp_exec(). - */ - -void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key); - -const unsigned * -brw_blorp_compile_nir_shader(struct blorp_context *blorp, struct nir_shader *nir, - const struct brw_wm_prog_key *wm_key, - bool use_repclear, - struct brw_blorp_prog_data *prog_data, - unsigned *program_size); - -/** \} */ - -#ifdef __cplusplus -} /* end extern "C" */ -#endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index f66f13e7624..afbf68f419b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -23,7 +23,7 @@ #pragma once -#include "blorp.h" +#include "blorp/blorp.h" #include "intel_mipmap_tree.h" #include "program/prog_instruction.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a76b25f0b96..bcda0ab74c8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -41,7 +41,7 @@ #include "intel_aub.h" #include "isl/isl.h" -#include "blorp.h" +#include "blorp/blorp.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index a4a4af94efd..2e62c70ee67 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -29,7 +29,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "genX_blorp_exec.h" +#include "blorp/blorp_genX_exec.h" #include "brw_blorp.h" diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.h b/src/mesa/drivers/dri/i965/genX_blorp_exec.h deleted file mode 100644 index f44076e129f..00000000000 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.h +++ /dev/null @@ -1,1176 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "blorp_priv.h" -#include "brw_device_info.h" -#include "intel_aub.h" - -/** - * This file provides the blorp pipeline setup and execution functionality. - * It defines the following function: - * - * static void - * blorp_exec(struct blorp_context *blorp, void *batch_data, - * const struct blorp_params *params); - * - * It is the job of whoever includes this header to wrap this in something - * to get an externally visible symbol. - * - * In order for the blorp_exec function to work, the driver must provide - * implementations of the following static helper functions. - */ - -static void * -blorp_emit_dwords(struct blorp_batch *batch, unsigned n); - -static uint64_t -blorp_emit_reloc(struct blorp_batch *batch, - void *location, struct blorp_address address, uint32_t delta); - -static void * -blorp_alloc_dynamic_state(struct blorp_batch *batch, - enum aub_state_struct_type type, - uint32_t size, - uint32_t alignment, - uint32_t *offset); -static void * -blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, - struct blorp_address *addr); - -static void -blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, - unsigned state_size, unsigned state_alignment, - uint32_t *bt_offset, uint32_t **bt_map, - void **surface_maps); -static void -blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, - struct blorp_address address, uint32_t delta); - -static void -blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size); -static void -blorp_emit_3dstate_multisample(struct blorp_batch *batch, unsigned samples); - -/***** BEGIN blorp_exec implementation ******/ - -#include "genxml/gen_macros.h" - -#define __gen_address_type struct blorp_address -#define __gen_user_data struct blorp_batch - -static uint64_t -__gen_combine_address(struct blorp_batch *batch, void *location, - struct blorp_address address, uint32_t delta) -{ - if (address.buffer == NULL) { - return address.offset + delta; - } else { - return blorp_emit_reloc(batch, location, address, delta); - } -} - -#include "genxml/genX_pack.h" - -#define _blorp_cmd_length(cmd) cmd ## _length -#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias -#define _blorp_cmd_header(cmd) cmd ## _header -#define _blorp_cmd_pack(cmd) cmd ## _pack - -#define blorp_emit(batch, cmd, name) \ - for (struct cmd name = { _blorp_cmd_header(cmd) }, \ - *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \ - __builtin_expect(_dst != NULL, 1); \ - _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \ - _dst = NULL) - -#define blorp_emitn(batch, cmd, n) ({ \ - uint32_t *_dw = blorp_emit_dwords(batch, n); \ - struct cmd template = { \ - _blorp_cmd_header(cmd), \ - .DWordLength = n - _blorp_cmd_length_bias(cmd), \ - }; \ - _blorp_cmd_pack(cmd)(batch, _dw, &template); \ - _dw + 1; /* Array starts at dw[1] */ \ - }) - -/* Once vertex fetcher has written full VUE entries with complete - * header the space requirement is as follows per vertex (in bytes): - * - * Header Position Program constants - * +--------+------------+-------------------+ - * | 16 | 16 | n x 16 | - * +--------+------------+-------------------+ - * - * where 'n' stands for number of varying inputs expressed as vec4s. - * - * The URB size is in turn expressed in 64 bytes (512 bits). - */ -static inline unsigned -gen7_blorp_get_vs_entry_size(const struct blorp_params *params) -{ - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned total_needed = 16 + 16 + num_varyings * 16; - - return DIV_ROUND_UP(total_needed, 64); -} - -/* 3DSTATE_URB - * 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: - * - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ -static void -emit_urb_config(struct blorp_batch *batch, - const struct blorp_params *params) -{ - blorp_emit_urb_config(batch, gen7_blorp_get_vs_entry_size(params)); -} - -static void -blorp_emit_vertex_data(struct blorp_batch *batch, - const struct blorp_params *params, - struct blorp_address *addr, - uint32_t *size) -{ - const float vertices[] = { - /* v0 */ (float)params->x0, (float)params->y1, - /* v1 */ (float)params->x1, (float)params->y1, - /* v2 */ (float)params->x0, (float)params->y0, - }; - - void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr); - memcpy(data, vertices, sizeof(vertices)); - *size = sizeof(vertices); -} - -static void -blorp_emit_input_varying_data(struct blorp_batch *batch, - const struct blorp_params *params, - struct blorp_address *addr, - uint32_t *size) -{ - const unsigned vec4_size_in_bytes = 4 * sizeof(float); - const unsigned max_num_varyings = - DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); - const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; - - *size = num_varyings * vec4_size_in_bytes; - - const float *const inputs_src = (const float *)¶ms->wm_inputs; - float *inputs = blorp_alloc_vertex_buffer(batch, *size, addr); - - /* Walk over the attribute slots, determine if the attribute is used by - * the program and when necessary copy the values from the input storage to - * the vertex data buffer. - */ - for (unsigned i = 0; i < max_num_varyings; i++) { - const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; - - if (!(params->wm_prog_data->inputs_read & (1ull << attr))) - continue; - - memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); - - inputs += 4; - } -} - -static void -blorp_emit_vertex_buffers(struct blorp_batch *batch, - const struct blorp_params *params) -{ - struct GENX(VERTEX_BUFFER_STATE) vb[2]; - memset(vb, 0, sizeof(vb)); - - unsigned num_buffers = 1; - - uint32_t size; - blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size); - vb[0].VertexBufferIndex = 0; - vb[0].BufferPitch = 2 * sizeof(float); - vb[0].VertexBufferMOCS = batch->blorp->mocs.vb; -#if GEN_GEN >= 7 - vb[0].AddressModifyEnable = true; -#endif -#if GEN_GEN >= 8 - vb[0].BufferSize = size; -#else - vb[0].BufferAccessType = VERTEXDATA; - vb[0].EndAddress = vb[0].BufferStartingAddress; - vb[0].EndAddress.offset += size - 1; -#endif - - if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) { - blorp_emit_input_varying_data(batch, params, - &vb[1].BufferStartingAddress, &size); - vb[1].VertexBufferIndex = 1; - vb[1].BufferPitch = 0; - vb[1].VertexBufferMOCS = batch->blorp->mocs.vb; -#if GEN_GEN >= 7 - vb[1].AddressModifyEnable = true; -#endif -#if GEN_GEN >= 8 - vb[1].BufferSize = size; -#else - vb[1].BufferAccessType = INSTANCEDATA; - vb[1].EndAddress = vb[1].BufferStartingAddress; - vb[1].EndAddress.offset += size - 1; -#endif - num_buffers++; - } - - const unsigned num_dwords = - 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers; - uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords); - - for (unsigned i = 0; i < num_buffers; i++) { - GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]); - dw += GENX(VERTEX_BUFFER_STATE_length); - } -} - -static void -blorp_emit_vertex_elements(struct blorp_batch *batch, - const struct blorp_params *params) -{ - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned num_elements = 2 + num_varyings; - - struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; - memset(ve, 0, num_elements * sizeof(*ve)); - - /* Setup VBO for the rectangle primitive.. - * - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three - * vertices. The vertices reside in screen space with DirectX - * coordinates (that is, (0, 0) is the upper left corner). - * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 - * - * Since the VS is disabled, the clipper loads each VUE directly from - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: - * dw0: Reserved, MBZ. - * dw1: Render Target Array Index. The HiZ op does not use indexed - * vertices, so set the dword to 0. - * dw2: Viewport Index. The HiZ op disables viewport mapping and - * scissoring, so set the dword to 0. - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, - * so set the dword to 0. - * dw4: Vertex Position X. - * dw5: Vertex Position Y. - * dw6: Vertex Position Z. - * dw7: Vertex Position W. - * - * dw8: Flat vertex input 0 - * dw9: Flat vertex input 1 - * ... - * dwn: Flat vertex input n - 8 - * - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 - * "Vertex URB Entry (VUE) Formats". - * - * Only vertex position X and Y are going to be variable, Z is fixed to - * zero and W to one. Header words dw0-3 are all zero. There is no need to - * include the fixed values in the vertex buffer. Vertex fetcher can be - * instructed to fill vertex elements with constant values of one and zero - * instead of reading them from the buffer. - * Flat inputs are program constants that are not interpolated. Moreover - * their values will be the same between vertices. - * - * See the vertex element setup below. - */ - ve[0].VertexBufferIndex = 0; - ve[0].Valid = true; - ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; - ve[0].SourceElementOffset = 0; - ve[0].Component0Control = VFCOMP_STORE_0; - ve[0].Component1Control = VFCOMP_STORE_0; - ve[0].Component2Control = VFCOMP_STORE_0; - ve[0].Component3Control = VFCOMP_STORE_0; - - ve[1].VertexBufferIndex = 0; - ve[1].Valid = true; - ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT; - ve[1].SourceElementOffset = 0; - ve[1].Component0Control = VFCOMP_STORE_SRC; - ve[1].Component1Control = VFCOMP_STORE_SRC; - ve[1].Component2Control = VFCOMP_STORE_0; - ve[1].Component3Control = VFCOMP_STORE_1_FP; - - for (unsigned i = 0; i < num_varyings; ++i) { - ve[i + 2].VertexBufferIndex = 1; - ve[i + 2].Valid = true; - ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; - ve[i + 2].SourceElementOffset = i * 4 * sizeof(float); - ve[i + 2].Component0Control = VFCOMP_STORE_SRC; - ve[i + 2].Component1Control = VFCOMP_STORE_SRC; - ve[i + 2].Component2Control = VFCOMP_STORE_SRC; - ve[i + 2].Component3Control = VFCOMP_STORE_SRC; - } - - const unsigned num_dwords = - 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements; - uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords); - - for (unsigned i = 0; i < num_elements; i++) { - GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]); - dw += GENX(VERTEX_ELEMENT_STATE_length); - } - -#if GEN_GEN >= 8 - blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs); - - for (unsigned i = 0; i < num_elements; i++) { - blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) { - vf.VertexElementIndex = i; - vf.InstancingEnable = false; - } - } - - blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { - topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; - } -#endif -} - -static void -blorp_emit_sf_config(struct blorp_batch *batch, - const struct blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw2.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) - * and BackFaceFillMode (dw2.5:6) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - -#if GEN_GEN >= 8 - - blorp_emit(batch, GENX(3DSTATE_SF), sf); - - blorp_emit(batch, GENX(3DSTATE_RASTER), raster) { - raster.CullMode = CULLMODE_NONE; - } - - blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { - sbe.VertexURBEntryReadOffset = 1; - sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); - sbe.ForceVertexURBEntryReadLength = true; - sbe.ForceVertexURBEntryReadOffset = true; - sbe.ConstantInterpolationEnable = prog_data->flat_inputs; - -#if GEN_GEN >= 9 - for (unsigned i = 0; i < 32; i++) - sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; -#endif - } - -#elif GEN_GEN >= 7 - - blorp_emit(batch, GENX(3DSTATE_SF), sf) { - sf.FrontFaceFillMode = FILL_MODE_SOLID; - sf.BackFaceFillMode = FILL_MODE_SOLID; - - sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? - MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; - -#if GEN_GEN == 7 - sf.DepthBufferSurfaceFormat = params->depth_format; -#endif - } - - blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { - sbe.VertexURBEntryReadOffset = 1; - if (prog_data) { - sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); - sbe.ConstantInterpolationEnable = prog_data->flat_inputs; - } else { - sbe.NumberofSFOutputAttributes = 0; - sbe.VertexURBEntryReadLength = 1; - } - } - -#else /* GEN_GEN <= 6 */ - - blorp_emit(batch, GENX(3DSTATE_SF), sf) { - sf.FrontFaceFillMode = FILL_MODE_SOLID; - sf.BackFaceFillMode = FILL_MODE_SOLID; - - sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? - MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; - - sf.VertexURBEntryReadOffset = 1; - if (prog_data) { - sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); - sf.ConstantInterpolationEnable = prog_data->flat_inputs; - } else { - sf.NumberofSFOutputAttributes = 0; - sf.VertexURBEntryReadLength = 1; - } - } - -#endif /* GEN_GEN */ -} - -static void -blorp_emit_ps_config(struct blorp_batch *batch, - const struct blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - - /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be - * nonzero to prevent the GPU from hanging. While the documentation doesn't - * mention this explicitly, it notes that the valid range for the field is - * [1,39] = [2,40] threads, which excludes zero. - * - * To be safe (and to minimize extraneous code) we go ahead and fully - * configure the WM state whether or not there is a WM program. - */ - -#if GEN_GEN >= 8 - - blorp_emit(batch, GENX(3DSTATE_WM), wm); - - blorp_emit(batch, GENX(3DSTATE_PS), ps) { - if (params->src.addr.buffer) { - ps.SamplerCount = 1; /* Up to 4 samplers */ - ps.BindingTableEntryCount = 2; - } else { - ps.BindingTableEntryCount = 1; - } - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->first_curbe_grf_0; - ps.DispatchGRFStartRegisterForConstantSetupData2 = - prog_data->first_curbe_grf_2; - - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; - - ps.KernelStartPointer0 = params->wm_prog_kernel; - ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; - - /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; - * it implicitly scales for different GT levels (which have some # of - * PSDs). - * - * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. - */ - if (GEN_GEN >= 9) - ps.MaximumNumberofThreadsPerPSD = 64 - 1; - else - ps.MaximumNumberofThreadsPerPSD = 64 - 2; - - switch (params->fast_clear_op) { - case BLORP_FAST_CLEAR_OP_NONE: - break; -#if GEN_GEN >= 9 - case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL: - ps.RenderTargetResolveType = RESOLVE_PARTIAL; - break; - case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: - ps.RenderTargetResolveType = RESOLVE_FULL; - break; -#else - case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: - ps.RenderTargetResolveEnable = true; - break; -#endif - case BLORP_FAST_CLEAR_OP_CLEAR: - ps.RenderTargetFastClearEnable = true; - break; - default: - unreachable("Invalid fast clear op"); - } - } - - blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { - psx.PixelShaderValid = true; - - if (params->src.addr.buffer) - psx.PixelShaderKillsPixel = true; - - psx.AttributeEnable = prog_data->num_varying_inputs > 0; - - if (prog_data && prog_data->persample_msaa_dispatch) - psx.PixelShaderIsPerSample = true; - } - -#elif GEN_GEN >= 7 - - blorp_emit(batch, GENX(3DSTATE_WM), wm) { - switch (params->hiz_op) { - case BLORP_HIZ_OP_DEPTH_CLEAR: - wm.DepthBufferClear = true; - break; - case BLORP_HIZ_OP_DEPTH_RESOLVE: - wm.DepthBufferResolveEnable = true; - break; - case BLORP_HIZ_OP_HIZ_RESOLVE: - wm.HierarchicalDepthBufferResolveEnable = true; - break; - case BLORP_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - - if (prog_data) - wm.ThreadDispatchEnable = true; - - if (params->src.addr.buffer) - wm.PixelShaderKillPixel = true; - - if (params->dst.surf.samples > 1) { - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; - wm.MultisampleDispatchMode = - (prog_data && prog_data->persample_msaa_dispatch) ? - MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; - } else { - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; - } - } - - blorp_emit(batch, GENX(3DSTATE_PS), ps) { - ps.MaximumNumberofThreads = - batch->blorp->isl_dev->info->max_wm_threads - 1; - -#if GEN_IS_HASWELL - ps.SampleMask = 1; -#endif - - if (prog_data) { - ps.DispatchGRFStartRegisterforConstantSetupData0 = - prog_data->first_curbe_grf_0; - ps.DispatchGRFStartRegisterforConstantSetupData2 = - prog_data->first_curbe_grf_2; - - ps.KernelStartPointer0 = params->wm_prog_kernel; - ps.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; - - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; - - ps.AttributeEnable = prog_data->num_varying_inputs > 0; - } else { - /* Gen7 hardware gets angry if we don't enable at least one dispatch - * mode, so just enable 16-pixel dispatch if we don't have a program. - */ - ps._16PixelDispatchEnable = true; - } - - if (params->src.addr.buffer) - ps.SamplerCount = 1; /* Up to 4 samplers */ - - switch (params->fast_clear_op) { - case BLORP_FAST_CLEAR_OP_NONE: - break; - case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: - ps.RenderTargetResolveEnable = true; - break; - case BLORP_FAST_CLEAR_OP_CLEAR: - ps.RenderTargetFastClearEnable = true; - break; - default: - unreachable("Invalid fast clear op"); - } - } - -#else /* GEN_GEN <= 6 */ - - blorp_emit(batch, GENX(3DSTATE_WM), wm) { - wm.MaximumNumberofThreads = - batch->blorp->isl_dev->info->max_wm_threads - 1; - - switch (params->hiz_op) { - case BLORP_HIZ_OP_DEPTH_CLEAR: - wm.DepthBufferClear = true; - break; - case BLORP_HIZ_OP_DEPTH_RESOLVE: - wm.DepthBufferResolveEnable = true; - break; - case BLORP_HIZ_OP_HIZ_RESOLVE: - wm.HierarchicalDepthBufferResolveEnable = true; - break; - case BLORP_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - - if (prog_data) { - wm.ThreadDispatchEnable = true; - - wm.DispatchGRFStartRegisterforConstantSetupData0 = - prog_data->first_curbe_grf_0; - wm.DispatchGRFStartRegisterforConstantSetupData2 = - prog_data->first_curbe_grf_2; - - wm.KernelStartPointer0 = params->wm_prog_kernel; - wm.KernelStartPointer2 = - params->wm_prog_kernel + prog_data->ksp_offset_2; - - wm._8PixelDispatchEnable = prog_data->dispatch_8; - wm._16PixelDispatchEnable = prog_data->dispatch_16; - - wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; - } - - if (params->src.addr.buffer) { - wm.SamplerCount = 1; /* Up to 4 samplers */ - wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ - } - - if (params->dst.surf.samples > 1) { - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; - wm.MultisampleDispatchMode = - (prog_data && prog_data->persample_msaa_dispatch) ? - MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; - } else { - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; - } - } - -#endif /* GEN_GEN */ -} - - -static void -blorp_emit_depth_stencil_config(struct blorp_batch *batch, - const struct blorp_params *params) -{ -#if GEN_GEN >= 7 - const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ -#else - const uint32_t mocs = 0; -#endif - - blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { - switch (params->depth.surf.dim) { - case ISL_SURF_DIM_1D: - db.SurfaceType = SURFTYPE_1D; - break; - case ISL_SURF_DIM_2D: - db.SurfaceType = SURFTYPE_2D; - break; - case ISL_SURF_DIM_3D: - db.SurfaceType = SURFTYPE_3D; - break; - } - - db.SurfaceFormat = params->depth_format; - -#if GEN_GEN >= 7 - db.DepthWriteEnable = true; -#endif - -#if GEN_GEN <= 6 - db.TiledSurface = true; - db.TileWalk = TILEWALK_YMAJOR; - db.MIPMapLayoutMode = MIPLAYOUT_BELOW; - db.SeparateStencilBufferEnable = true; -#endif - - db.HierarchicalDepthBufferEnable = true; - - db.Width = params->depth.surf.logical_level0_px.width - 1; - db.Height = params->depth.surf.logical_level0_px.height - 1; - db.RenderTargetViewExtent = db.Depth = - MAX2(params->depth.surf.logical_level0_px.depth, - params->depth.surf.logical_level0_px.array_len) - 1; - - db.LOD = params->depth.view.base_level; - db.MinimumArrayElement = params->depth.view.base_array_layer; - - db.SurfacePitch = params->depth.surf.row_pitch - 1; - db.SurfaceBaseAddress = params->depth.addr; - db.DepthBufferMOCS = mocs; - } - - blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { - hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; - hiz.SurfaceBaseAddress = params->depth.aux_addr; - hiz.HierarchicalDepthBufferMOCS = mocs; - } - - blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); -} - -static uint32_t -blorp_emit_blend_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - struct GENX(BLEND_STATE) blend; - memset(&blend, 0, sizeof(blend)); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - blend.Entry[i].PreBlendColorClampEnable = true; - blend.Entry[i].PostBlendColorClampEnable = true; - blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; - - blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; - blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; - blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; - blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; - } - - uint32_t offset; - void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE, - GENX(BLEND_STATE_length) * 4, - 64, &offset); - GENX(BLEND_STATE_pack)(NULL, state, &blend); - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { - sp.BlendStatePointer = offset; -#if GEN_GEN >= 8 - sp.BlendStatePointerValid = true; -#endif - } -#endif - -#if GEN_GEN >= 8 - blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { - ps_blend.HasWriteableRT = true; - } -#endif - - return offset; -} - -static uint32_t -blorp_emit_color_calc_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - uint32_t offset; - void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE, - GENX(COLOR_CALC_STATE_length) * 4, - 64, &offset); - memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { - sp.ColorCalcStatePointer = offset; -#if GEN_GEN >= 8 - sp.ColorCalcStatePointerValid = true; -#endif - } -#endif - - return offset; -} - -static uint32_t -blorp_emit_depth_stencil_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ -#if GEN_GEN >= 8 - - /* On gen8+, DEPTH_STENCIL state is simply an instruction */ - blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds); - return 0; - -#else /* GEN_GEN <= 7 */ - - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - struct GENX(DEPTH_STENCIL_STATE) ds = { - .DepthBufferWriteEnable = true, - }; - - if (params->hiz_op == BLORP_HIZ_OP_DEPTH_RESOLVE) { - ds.DepthTestEnable = true; - ds.DepthTestFunction = COMPAREFUNCTION_NEVER; - } - - uint32_t offset; - void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE, - GENX(DEPTH_STENCIL_STATE_length) * 4, - 64, &offset); - GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { - sp.PointertoDEPTH_STENCIL_STATE = offset; - } -#endif - - return offset; - -#endif /* GEN_GEN */ -} - -struct surface_state_info { - unsigned num_dwords; - unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */ - unsigned reloc_dw; - unsigned aux_reloc_dw; -}; - -static const struct surface_state_info surface_state_infos[] = { - [6] = {6, 32, 1, 0}, - [7] = {8, 32, 1, 6}, - [8] = {13, 64, 8, 10}, - [9] = {16, 64, 8, 10}, -}; - -static void -blorp_emit_surface_state(struct blorp_batch *batch, - const struct brw_blorp_surface_info *surface, - uint32_t *state, uint32_t state_offset, - bool is_render_target) -{ - const struct surface_state_info ss_info = surface_state_infos[GEN_GEN]; - - struct isl_surf surf = surface->surf; - - if (surf.dim == ISL_SURF_DIM_1D && - surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) { - assert(surf.logical_level0_px.height == 1); - surf.dim = ISL_SURF_DIM_2D; - } - - /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */ - enum isl_aux_usage aux_usage = surface->aux_usage; - if (aux_usage == ISL_AUX_USAGE_HIZ) - aux_usage = ISL_AUX_USAGE_NONE; - - const uint32_t mocs = - is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex; - - isl_surf_fill_state(batch->blorp->isl_dev, state, - .surf = &surf, .view = &surface->view, - .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, - .mocs = mocs, .clear_color = surface->clear_color, - .x_offset_sa = surface->tile_x_sa, - .y_offset_sa = surface->tile_y_sa); - - blorp_surface_reloc(batch, state_offset + ss_info.reloc_dw * 4, - surface->addr, 0); - - if (aux_usage != ISL_AUX_USAGE_NONE) { - /* On gen7 and prior, the bottom 12 bits of the MCS base address are - * used to store other information. This should be ok, however, because - * surface buffer addresses are always 4K page alinged. - */ - assert((surface->aux_addr.offset & 0xfff) == 0); - blorp_surface_reloc(batch, state_offset + ss_info.aux_reloc_dw * 4, - surface->aux_addr, state[ss_info.aux_reloc_dw]); - } -} - -static void -blorp_emit_surface_states(struct blorp_batch *batch, - const struct blorp_params *params) -{ - uint32_t bind_offset, *bind_map; - void *surface_maps[2]; - - const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4; - const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32; - - unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL); - blorp_alloc_binding_table(batch, num_surfaces, ss_size, ss_align, - &bind_offset, &bind_map, surface_maps); - - blorp_emit_surface_state(batch, ¶ms->dst, - surface_maps[BLORP_RENDERBUFFER_BT_INDEX], - bind_map[BLORP_RENDERBUFFER_BT_INDEX], true); - if (params->src.addr.buffer) { - blorp_emit_surface_state(batch, ¶ms->src, - surface_maps[BLORP_TEXTURE_BT_INDEX], - bind_map[BLORP_TEXTURE_BT_INDEX], false); - } - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { - bt.PointertoPSBindingTable = bind_offset; - } -#else - blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { - bt.PSBindingTableChange = true; - bt.PointertoPSBindingTable = bind_offset; - } -#endif -} - -static void -blorp_emit_sampler_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - struct GENX(SAMPLER_STATE) sampler = { - .MipModeFilter = MIPFILTER_NONE, - .MagModeFilter = MAPFILTER_LINEAR, - .MinModeFilter = MAPFILTER_LINEAR, - .MinLOD = 0, - .MaxLOD = 0, - .TCXAddressControlMode = TCM_CLAMP, - .TCYAddressControlMode = TCM_CLAMP, - .TCZAddressControlMode = TCM_CLAMP, - .MaximumAnisotropy = RATIO21, - .RAddressMinFilterRoundingEnable = true, - .RAddressMagFilterRoundingEnable = true, - .VAddressMinFilterRoundingEnable = true, - .VAddressMagFilterRoundingEnable = true, - .UAddressMinFilterRoundingEnable = true, - .UAddressMagFilterRoundingEnable = true, - .NonnormalizedCoordinateEnable = true, - }; - - uint32_t offset; - void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE, - GENX(SAMPLER_STATE_length) * 4, - 32, &offset); - GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { - ssp.PointertoPSSamplerState = offset; - } -#else - blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { - ssp.VSSamplerStateChange = true; - ssp.GSSamplerStateChange = true; - ssp.PSSamplerStateChange = true; - ssp.PointertoPSSamplerState = offset; - } -#endif -} - -/* 3DSTATE_VIEWPORT_STATE_POINTERS */ -static void -blorp_emit_viewport_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - uint32_t cc_vp_offset; - - void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE, - GENX(CC_VIEWPORT_length) * 4, 32, - &cc_vp_offset); - - GENX(CC_VIEWPORT_pack)(batch, state, - &(struct GENX(CC_VIEWPORT)) { - .MinimumDepth = 0.0, - .MaximumDepth = 1.0, - }); - -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { - vsp.CCViewportPointer = cc_vp_offset; - } -#else - blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { - vsp.CCViewportStateChange = true; - vsp.PointertoCC_VIEWPORT = cc_vp_offset; - } -#endif -} - - -/** - * \brief Execute a blit or render pass operation. - * - * To execute the operation, this function manually constructs and emits a - * batch to draw a rectangle primitive. The batchbuffer is flushed before - * constructing and after emitting the batch. - * - * This function alters no GL state. - */ -static void -blorp_exec(struct blorp_batch *batch, const struct blorp_params *params) -{ - uint32_t blend_state_offset = 0; - uint32_t color_calc_state_offset = 0; - uint32_t depth_stencil_state_offset; - - blorp_emit_vertex_buffers(batch, params); - blorp_emit_vertex_elements(batch, params); - - emit_urb_config(batch, params); - - if (params->wm_prog_data) { - blend_state_offset = blorp_emit_blend_state(batch, params); - color_calc_state_offset = blorp_emit_color_calc_state(batch, params); - } - depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params); - -#if GEN_GEN <= 6 - /* 3DSTATE_CC_STATE_POINTERS - * - * The pointer offsets are relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - * - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. - * - * The dynamic state emit helpers emit their own STATE_POINTERS packets on - * gen7+. However, on gen6 and earlier, they're all lumpped together in - * one CC_STATE_POINTERS packet so we have to emit that here. - */ - blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) { - cc.BLEND_STATEChange = true; - cc.COLOR_CALC_STATEChange = true; - cc.DEPTH_STENCIL_STATEChange = true; - cc.PointertoBLEND_STATE = blend_state_offset; - cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; - cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; - } -#else - (void)blend_state_offset; - (void)color_calc_state_offset; - (void)depth_stencil_state_offset; -#endif - - blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs); -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs); - blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS); -#endif - blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs); - blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps); - - if (params->wm_prog_data) - blorp_emit_surface_states(batch, params); - - if (params->src.addr.buffer) - blorp_emit_sampler_state(batch, params); - - blorp_emit_3dstate_multisample(batch, params->dst.surf.samples); - - blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) { - mask.SampleMask = (1 << params->dst.surf.samples) - 1; - } - - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * - * [DevSNB] A pipeline flush must be programmed prior to a - * 3DSTATE_VS command that causes the VS Function Enable to - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL - * command with CS stall bit set and a post sync operation. - * - * We've already done one at the start of the BLORP operation. - */ - blorp_emit(batch, GENX(3DSTATE_VS), vs); -#if GEN_GEN >= 7 - blorp_emit(batch, GENX(3DSTATE_HS), hs); - blorp_emit(batch, GENX(3DSTATE_TE), te); - blorp_emit(batch, GENX(3DSTATE_DS), DS); - blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so); -#endif - blorp_emit(batch, GENX(3DSTATE_GS), gs); - - blorp_emit(batch, GENX(3DSTATE_CLIP), clip) { - clip.PerspectiveDivideDisable = true; - } - - blorp_emit_sf_config(batch, params); - blorp_emit_ps_config(batch, params); - - blorp_emit_viewport_state(batch, params); - - if (params->depth.addr.buffer) { - blorp_emit_depth_stencil_config(batch, params); - } else { - blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { - db.SurfaceType = SURFTYPE_NULL; - db.SurfaceFormat = D32_FLOAT; - } - blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); - blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); - } - - /* 3DSTATE_CLEAR_PARAMS - * - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. - */ - blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) { - clear.DepthClearValueValid = true; - clear.DepthClearValue = params->depth.clear_color.u32[0]; - } - - blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { - rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; - rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; - } - - blorp_emit(batch, GENX(3DPRIMITIVE), prim) { - prim.VertexAccessType = SEQUENTIAL; - prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; - prim.VertexCountPerInstance = 3; - prim.InstanceCount = params->num_layers; - } -} diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.h b/src/mesa/drivers/dri/i965/intel_resolve_map.h index 672a4aaba7c..e998e97e6da 100644 --- a/src/mesa/drivers/dri/i965/intel_resolve_map.h +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.h @@ -24,7 +24,7 @@ #pragma once #include <stdint.h> -#include "blorp.h" +#include "blorp/blorp.h" #include "compiler/glsl/list.h" #ifdef __cplusplus |