diff options
Diffstat (limited to 'src')
179 files changed, 74934 insertions, 1304 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 9b23cf58f4f..272e68ce875 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -56,6 +56,11 @@ EXTRA_DIST = \ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) +if HAVE_VULKAN +SUBDIRS += isl +SUBDIRS += vulkan +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include/ \ -I$(top_srcdir)/src/mapi/ \ diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index 0bc8e48efa6..e3d297fe299 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -84,7 +84,7 @@ check_PROGRAMS += \ glsl/tests/sampler-types-test \ glsl/tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler +noinst_PROGRAMS = glsl_compiler spirv2nir glsl_tests_blob_test_SOURCES = \ glsl/tests/blob_test.c @@ -176,6 +176,15 @@ glsl_glsl_test_LDADD = \ $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ + -lm -lstdc++ \ + $(PTHREAD_LIBS) + # We write our own rules for yacc and lex below. We'd rather use automake, # but automake makes it especially difficult for a number of reasons: # @@ -262,6 +271,7 @@ nir_libnir_la_LIBADD = \ nir_libnir_la_SOURCES = \ $(NIR_FILES) \ + $(SPIRV_FILES) \ $(NIR_GENERATED_FILES) PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index c9780d6d6f7..2a4568aa679 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -175,7 +175,9 @@ NIR_FILES = \ nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ + nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ + nir/nir_inline_functions.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ nir/nir_instr_set.c \ @@ -186,12 +188,14 @@ NIR_FILES = \ nir/nir_lower_clip.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_indirect_derefs.c \ nir/nir_lower_load_const_to_scalar.c \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ nir/nir_lower_io.c \ nir/nir_lower_outputs_to_temporaries.c \ nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_returns.c \ nir/nir_lower_samplers.c \ nir/nir_lower_system_values.c \ nir/nir_lower_tex.c \ @@ -213,8 +217,11 @@ NIR_FILES = \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_undef.c \ + nir/nir_phi_builder.c \ + nir/nir_phi_builder.h \ nir/nir_print.c \ nir/nir_remove_dead_variables.c \ + nir/nir_repair_ssa.c \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ @@ -224,3 +231,12 @@ NIR_FILES = \ nir/nir_vla.h \ nir/nir_worklist.c \ nir/nir_worklist.h + +SPIRV_FILES = \ + nir/spirv/nir_spirv.h \ + nir/spirv/spirv_to_nir.c \ + nir/spirv/vtn_alu.c \ + nir/spirv/vtn_cfg.c \ + nir/spirv/vtn_glsl450.c \ + nir/spirv/vtn_private.h \ + nir/spirv/vtn_variables.c diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore index dda423f83db..e80f8af6bfc 100644 --- a/src/compiler/glsl/.gitignore +++ b/src/compiler/glsl/.gitignore @@ -4,6 +4,7 @@ glsl_parser.cpp glsl_parser.h glsl_parser.output glsl_test +spirv2nir subtest-cr/ subtest-lf/ subtest-cr-lf/ diff --git a/src/compiler/glsl/Makefile.am b/src/compiler/glsl/Makefile.am index 9954b812403..d6b1f9ed695 100644 --- a/src/compiler/glsl/Makefile.am +++ b/src/compiler/glsl/Makefile.am @@ -66,7 +66,7 @@ check_PROGRAMS = \ tests/sampler-types-test \ tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler +noinst_PROGRAMS = glsl_compiler spirv2nir tests_blob_test_SOURCES = \ tests/blob_test.c @@ -135,7 +135,6 @@ libglsl_la_SOURCES = \ glsl_parser.h \ $(LIBGLSL_FILES) - glsl_compiler_SOURCES = \ $(GLSL_COMPILER_CXX_FILES) @@ -145,6 +144,16 @@ glsl_compiler_LDADD = \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + glsl_test_SOURCES = \ standalone_scaffolding.cpp \ test.cpp \ diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources index 08b40c5cc8f..3f537d5b37a 100644 --- a/src/compiler/glsl/Makefile.sources +++ b/src/compiler/glsl/Makefile.sources @@ -29,7 +29,9 @@ NIR_FILES = \ nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ + nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ + nir/nir_inline_functions.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ nir/nir_instr_set.c \ @@ -38,8 +40,10 @@ NIR_FILES = \ nir/nir_lower_alu_to_scalar.c \ nir/nir_lower_atomics.c \ nir/nir_lower_clip.c \ + nir/nir_lower_returns.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_indirect_derefs.c \ nir/nir_lower_load_const_to_scalar.c \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ @@ -67,8 +71,11 @@ NIR_FILES = \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_undef.c \ + nir/nir_phi_builder.c \ + nir/nir_phi_builder.h \ nir/nir_print.c \ nir/nir_remove_dead_variables.c \ + nir/nir_repair_ssa.c \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ @@ -79,6 +86,15 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h +SPIRV_FILES = \ + nir/spirv/nir_spirv.h \ + nir/spirv/spirv_to_nir.c \ + nir/spirv/vtn_alu.c \ + nir/spirv/vtn_cfg.c \ + nir/spirv/vtn_glsl450.c \ + nir/spirv/vtn_private.h \ + nir/spirv/vtn_variables.c + # libglsl LIBGLSL_FILES = \ diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index dfd31966eb0..98d8bc5f268 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -1129,6 +1129,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_SUBROUTINE: /* I assume a comparison of a struct containing a sampler just diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 603895497d1..ecf0d7f76e5 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -87,6 +87,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->extensions = &ctx->Extensions; + this->ARB_compute_shader_enable = true; + this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index de9d314bae4..5debca32411 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -298,8 +298,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) break; case ir_unop_noise: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: this->type = glsl_type::float_type; break; @@ -422,10 +420,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) this->type = op0->type->get_base_type(); break; - case ir_binop_pack_half_2x16_split: - this->type = glsl_type::uint_type; - break; - case ir_binop_imul_high: case ir_binop_carry: case ir_binop_borrow: @@ -555,8 +549,6 @@ static const char *const operator_strs[] = { "unpackUnorm2x16", "unpackUnorm4x8", "unpackHalf2x16", - "unpackHalf2x16_split_x", - "unpackHalf2x16_split_y", "bitfield_reverse", "bit_count", "find_msb", @@ -599,7 +591,6 @@ static const char *const operator_strs[] = { "min", "max", "pow", - "packHalf2x16_split", "ubo_load", "ldexp", "vector_extract", diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index bd7b5506343..09e21b22188 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1402,16 +1402,6 @@ enum ir_expression_operation { /*@}*/ /** - * \name Lowered floating point unpacking operations. - * - * \see lower_packing_builtins_visitor::split_unpack_half_2x16 - */ - /*@{*/ - ir_unop_unpack_half_2x16_split_x, - ir_unop_unpack_half_2x16_split_y, - /*@}*/ - - /** * \name Bit operations, part of ARB_gpu_shader5. */ /*@{*/ @@ -1542,15 +1532,6 @@ enum ir_expression_operation { ir_binop_pow, /** - * \name Lowered floating point packing operations. - * - * \see lower_packing_builtins_visitor::split_pack_half_2x16 - */ - /*@{*/ - ir_binop_pack_half_2x16_split, - /*@}*/ - - /** * Load a value the size of a given GLSL type from a uniform block. * * operand0 is the ir_constant uniform block index in the linked shader. diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp index 0965b0d3719..b32ec17f1af 100644 --- a/src/compiler/glsl/ir_clone.cpp +++ b/src/compiler/glsl/ir_clone.cpp @@ -366,6 +366,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index be86f547f77..b56413a1500 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -58,17 +58,14 @@ enum lower_packing_builtins_op { LOWER_PACK_HALF_2x16 = 0x0010, LOWER_UNPACK_HALF_2x16 = 0x0020, - LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, - LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + LOWER_PACK_SNORM_4x8 = 0x0040, + LOWER_UNPACK_SNORM_4x8 = 0x0080, - LOWER_PACK_SNORM_4x8 = 0x0100, - LOWER_UNPACK_SNORM_4x8 = 0x0200, + LOWER_PACK_UNORM_4x8 = 0x0100, + LOWER_UNPACK_UNORM_4x8 = 0x0200, - LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800, - - LOWER_PACK_USE_BFI = 0x1000, - LOWER_PACK_USE_BFE = 0x2000, + LOWER_PACK_USE_BFI = 0x0400, + LOWER_PACK_USE_BFE = 0x0800, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index cad7069bf98..2ec5a3f73f7 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -372,12 +372,6 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == glsl_type::uint_type); break; - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - assert(ir->type == glsl_type::float_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - case ir_unop_unpack_double_2x32: assert(ir->type == glsl_type::uvec2_type); assert(ir->operands[0]->type == glsl_type::double_type); @@ -567,12 +561,6 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == ir->operands[1]->type); break; - case ir_binop_pack_half_2x16_split: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::float_type); - assert(ir->operands[1]->type == glsl_type::float_type); - break; - case ir_binop_ubo_load: assert(ir->operands[0]->type == glsl_type::uint_type); diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index 58d21e5125e..cdc1d3ac7be 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -88,6 +88,7 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_ERROR: diff --git a/src/compiler/glsl/lower_packing_builtins.cpp b/src/compiler/glsl/lower_packing_builtins.cpp index 7f18238bc6e..a41627bd561 100644 --- a/src/compiler/glsl/lower_packing_builtins.cpp +++ b/src/compiler/glsl/lower_packing_builtins.cpp @@ -43,13 +43,6 @@ public: : op_mask(op_mask), progress(false) { - /* Mutually exclusive options. */ - assert(!((op_mask & LOWER_PACK_HALF_2x16) && - (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); - - assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && - (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); - factory.instructions = &factory_instructions; } @@ -96,9 +89,6 @@ public: case LOWER_PACK_HALF_2x16: *rvalue = lower_pack_half_2x16(op0); break; - case LOWER_PACK_HALF_2x16_TO_SPLIT: - *rvalue = split_pack_half_2x16(op0); - break; case LOWER_UNPACK_SNORM_2x16: *rvalue = lower_unpack_snorm_2x16(op0); break; @@ -114,9 +104,6 @@ public: case LOWER_UNPACK_HALF_2x16: *rvalue = lower_unpack_half_2x16(op0); break; - case LOWER_UNPACK_HALF_2x16_TO_SPLIT: - *rvalue = split_unpack_half_2x16(op0); - break; case LOWER_PACK_UNPACK_NONE: case LOWER_PACK_USE_BFI: case LOWER_PACK_USE_BFE: @@ -161,7 +148,7 @@ private: result = op_mask & LOWER_PACK_UNORM_4x8; break; case ir_unop_pack_half_2x16: - result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); + result = op_mask & LOWER_PACK_HALF_2x16; break; case ir_unop_unpack_snorm_2x16: result = op_mask & LOWER_UNPACK_SNORM_2x16; @@ -176,7 +163,7 @@ private: result = op_mask & LOWER_UNPACK_UNORM_4x8; break; case ir_unop_unpack_half_2x16: - result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); + result = op_mask & LOWER_UNPACK_HALF_2x16; break; default: result = LOWER_PACK_UNPACK_NONE; @@ -1093,41 +1080,6 @@ private: } /** - * \brief Split packHalf2x16's vec2 operand into two floats. - * - * \param vec2_rval is packHalf2x16's input - * \return a uint rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, packHalf2x16 cannot be scalarized by the same mechanism as - * a true vector operation because its input and output have a differing - * number of vector components. - * - * This method scalarizes packHalf2x16 by transforming it from an unary - * operation having vector input to a binary operation having scalar input. - * That is, it transforms - * - * packHalf2x16(VEC2_RVAL); - * - * into - * - * vec2 v = VEC2_RVAL; - * return packHalf2x16_split(v.x, v.y); - */ - ir_rvalue* - split_pack_half_2x16(ir_rvalue *vec2_rval) - { - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_pack_half_2x16_v"); - factory.emit(assign(v, vec2_rval)); - - return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); - } - - /** * \brief Lower the component-wise calculation of unpackHalf2x16. * * Given a uint that encodes a float16 in its lower 16 bits, this function @@ -1341,59 +1293,6 @@ private: assert(result->type == glsl_type::vec2_type); return result; } - - /** - * \brief Split unpackHalf2x16 into two operations. - * - * \param uint_rval is unpackHalf2x16's input - * \return a vec2 rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, unpackHalf2x16 cannot be scalarized by the same method as - * a true vector operation because the number of components of its input - * and output differ. - * - * This method scalarizes unpackHalf2x16 by transforming it from a single - * operation having vec2 output to a pair of operations each having float - * output. That is, it transforms - * - * unpackHalf2x16(UINT_RVAL) - * - * into - * - * uint u = UINT_RVAL; - * vec2 v; - * - * v.x = unpackHalf2x16_split_x(u); - * v.y = unpackHalf2x16_split_y(u); - * - * return v; - */ - ir_rvalue* - split_unpack_half_2x16(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = uint_rval; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_split_unpack_half_2x16_u"); - factory.emit(assign(u, uint_rval)); - - /* vec2 v; */ - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_unpack_half_2x16_v"); - - /* v.x = unpack_half_2x16_split_x(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), - WRITEMASK_X)); - - /* v.y = unpack_half_2x16_split_y(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), - WRITEMASK_Y)); - - return deref(v).val; - } }; } // namespace anonymous diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index d5d214b57cc..0f7a16a5e6f 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -35,6 +35,12 @@ #include "util/ralloc.h" #include "util/strtod.h" +extern "C" void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} + void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 17ebf07acbc..5920c2e2611 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::function_types = NULL; hash_table *glsl_type::subroutine_types = NULL; void *glsl_type::mem_ctx = NULL; @@ -169,6 +170,39 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, mtx_unlock(&glsl_type::mutex); } +glsl_type::glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) : + gl_type(0), + base_type(GLSL_TYPE_FUNCTION), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_params) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + + this->fields.parameters = rzalloc_array(this->mem_ctx, + glsl_function_param, num_params + 1); + + /* We store the return type as the first parameter */ + this->fields.parameters[0].type = return_type; + this->fields.parameters[0].in = false; + this->fields.parameters[0].out = true; + + /* We store the i'th parameter in slot i+1 */ + for (i = 0; i < length; i++) { + this->fields.parameters[i + 1].type = params[i].type; + this->fields.parameters[i + 1].in = params[i].in; + this->fields.parameters[i + 1].out = params[i].out; + } + + mtx_unlock(&glsl_type::mutex); +} + glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), @@ -681,6 +715,93 @@ glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, } const glsl_type * +glsl_type::get_image_instance(enum glsl_sampler_dim dim, + bool array, glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? image1DArray_type : image1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? image2DArray_type : image2D_type); + case GLSL_SAMPLER_DIM_3D: + return image3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? imageCubeArray_type : imageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + else + return image2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + else + return imageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? image2DMSArray_type : image2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_INT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? iimage1DArray_type : iimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? iimage2DArray_type : iimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return iimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? iimageCubeArray_type : iimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return iimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return iimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? iimage2DMSArray_type : iimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? uimage1DArray_type : uimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? uimage2DArray_type : uimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return uimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? uimageCubeArray_type : uimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return uimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return uimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? uimage2DMSArray_type : uimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); +} + +const glsl_type * glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) { /* Generate a name using the base type pointer in the key. This is @@ -924,6 +1045,74 @@ glsl_type::get_subroutine_instance(const char *subroutine_name) } +static bool +function_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + if (key1->length != key2->length) + return 1; + + return memcmp(key1->fields.parameters, key2->fields.parameters, + (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0; +} + + +static uint32_t +function_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + char hash_key[128]; + unsigned size = 0; + + size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + + for (unsigned i = 0; i < key->length; i++) { + if (size >= sizeof(hash_key)) + break; + + size += snprintf(& hash_key[size], sizeof(hash_key) - size, + "%p", (void *) key->fields.structure[i].type); + } + + return _mesa_hash_string(hash_key); +} + +const glsl_type * +glsl_type::get_function_instance(const glsl_type *return_type, + const glsl_function_param *params, + unsigned num_params) +{ + const glsl_type key(return_type, params, num_params); + + mtx_lock(&glsl_type::mutex); + + if (function_types == NULL) { + function_types = _mesa_hash_table_create(NULL, function_key_hash, + function_key_compare); + } + + struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(return_type, params, num_params); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(function_types, t, (void *) t); + } + + const glsl_type *t = (const glsl_type *)entry->data; + + assert(t->base_type == GLSL_TYPE_FUNCTION); + assert(t->length == num_params); + + mtx_unlock(&glsl_type::mutex); + + return t; +} + + const glsl_type * glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) { @@ -1053,6 +1242,8 @@ glsl_type::component_slots() const return 1; case GLSL_TYPE_SUBROUTINE: return 1; + + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_VOID: @@ -1691,6 +1882,7 @@ glsl_type::count_attribute_slots(bool vertex_input_slots) const case GLSL_TYPE_ARRAY: return this->length * this->fields.array->count_attribute_slots(vertex_input_slots); + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index e63d7945c9f..a9b5281e774 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -56,6 +56,7 @@ enum glsl_base_type { GLSL_TYPE_IMAGE, GLSL_TYPE_ATOMIC_UINT, GLSL_TYPE_STRUCT, + GLSL_TYPE_FUNCTION, GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, @@ -187,7 +188,7 @@ struct glsl_type { */ union { const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_function_param *parameters; /**< Parameters to function. */ struct glsl_struct_field *structure; /**< List of struct fields. */ } fields; @@ -250,6 +251,8 @@ struct glsl_type { bool array, glsl_base_type type); + static const glsl_type *get_image_instance(enum glsl_sampler_dim dim, + bool array, glsl_base_type type); /** * Get the instance of an array type @@ -278,6 +281,13 @@ struct glsl_type { static const glsl_type *get_subroutine_instance(const char *subroutine_name); /** + * Get the instance of a function type + */ + static const glsl_type *get_function_instance(const struct glsl_type *return_type, + const glsl_function_param *parameters, + unsigned num_params); + + /** * Get the type resulting from a multiplication of \p type_a * \p type_b */ static const glsl_type *get_mul_type(const glsl_type *type_a, @@ -758,6 +768,10 @@ private: glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, const char *name); + /** Constructor for interface types */ + glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params); + /** Constructor for array types */ glsl_type(const glsl_type *array, unsigned length); @@ -776,6 +790,9 @@ private: /** Hash table containing the known subroutine types. */ static struct hash_table *subroutine_types; + /** Hash table containing the known function types. */ + static struct hash_table *function_types; + static bool record_key_compare(const void *a, const void *b); static unsigned record_key_hash(const void *key); @@ -803,6 +820,10 @@ private: /*@}*/ }; +#undef DECL_TYPE +#undef STRUCT_TYPE +#endif /* __cplusplus */ + struct glsl_struct_field { const struct glsl_type *type; const char *name; @@ -860,6 +881,7 @@ struct glsl_struct_field { unsigned image_volatile:1; unsigned image_restrict:1; +#ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), @@ -872,6 +894,14 @@ struct glsl_struct_field { { /* empty */ } +#endif +}; + +struct glsl_function_param { + const struct glsl_type *type; + + bool in; + bool out; }; static inline unsigned int @@ -880,8 +910,4 @@ glsl_align(unsigned int a, unsigned int align) return (a + align - 1) / align * align; } -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - #endif /* GLSL_TYPES_H */ diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources index 0755a100e65..04e8ab88a35 100644 --- a/src/compiler/nir/Makefile.sources +++ b/src/compiler/nir/Makefile.sources @@ -19,7 +19,9 @@ NIR_FILES = \ nir_control_flow_private.h \ nir_dominance.c \ nir_from_ssa.c \ + nir_gather_info.c \ nir_gs_count_vertices.c \ + nir_inline_functions.c \ nir_intrinsics.c \ nir_intrinsics.h \ nir_instr_set.c \ @@ -30,12 +32,14 @@ NIR_FILES = \ nir_lower_clip.c \ nir_lower_global_vars_to_local.c \ nir_lower_gs_intrinsics.c \ + nir_lower_indirect_derefs.c \ nir_lower_load_const_to_scalar.c \ nir_lower_locals_to_regs.c \ nir_lower_idiv.c \ nir_lower_io.c \ nir_lower_outputs_to_temporaries.c \ nir_lower_phis_to_scalar.c \ + nir_lower_returns.c \ nir_lower_samplers.c \ nir_lower_system_values.c \ nir_lower_tex.c \ @@ -57,8 +61,11 @@ NIR_FILES = \ nir_opt_peephole_select.c \ nir_opt_remove_phis.c \ nir_opt_undef.c \ + nir_phi_builder.c \ + nir_phi_builder.h \ nir_print.c \ nir_remove_dead_variables.c \ + nir_repair_ssa.c \ nir_search.c \ nir_search.h \ nir_split_var_copies.c \ @@ -69,3 +76,12 @@ NIR_FILES = \ nir_worklist.c \ nir_worklist.h +SPIRV_FILES = \ + spirv/nir_spirv.h \ + spirv/spirv_to_nir.c \ + spirv/vtn_alu.c \ + spirv/vtn_cfg.c \ + spirv/vtn_glsl450.c \ + spirv/vtn_private.h \ + spirv/vtn_variables.c + diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index 4b76d234420..2a3047dd33c 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -46,7 +46,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader); + nir_visitor(nir_shader *shader, gl_shader *sh); ~nir_visitor(); virtual void visit(ir_variable *); @@ -85,6 +85,8 @@ private: bool supports_ints; + struct gl_shader *sh; + nir_shader *shader; nir_function_impl *impl; nir_builder b; @@ -138,12 +140,21 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, nir_shader *shader = nir_shader_create(NULL, stage, options); - nir_visitor v1(shader); + nir_visitor v1(shader, sh); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); - nir_lower_outputs_to_temporaries(shader); + nir_function *main = NULL; + nir_foreach_function(shader, func) { + if (strcmp(func->name, "main") == 0) { + main = func; + break; + } + } + assert(main); + + nir_lower_outputs_to_temporaries(shader, main); shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) @@ -204,10 +215,11 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, return shader; } -nir_visitor::nir_visitor(nir_shader *shader) +nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh) { this->supports_ints = shader->options->native_integers; this->shader = shader; + this->sh = sh; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -388,6 +400,7 @@ nir_visitor::visit(ir_variable *ir) } var->data.index = ir->data.index; + var->data.descriptor_set = 0; var->data.binding = ir->data.binding; var->data.offset = ir->data.offset; var->data.image.read_only = ir->data.image_read_only; @@ -1429,12 +1442,6 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_unpack_half_2x16: result = nir_unpack_half_2x16(&b, srcs[0]); break; - case ir_unop_unpack_half_2x16_split_x: - result = nir_unpack_half_2x16_split_x(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - result = nir_unpack_half_2x16_split_y(&b, srcs[0]); - break; case ir_unop_bitfield_reverse: result = nir_bitfield_reverse(&b, srcs[0]); break; @@ -1718,9 +1725,6 @@ nir_visitor::visit(ir_expression *ir) } break; - case ir_binop_pack_half_2x16_split: - result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]); - break; case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; case ir_triop_fma: result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 21bf678c04e..42a53f6f3db 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -39,6 +39,7 @@ nir_shader_create(void *mem_ctx, exec_list_make_empty(&shader->uniforms); exec_list_make_empty(&shader->inputs); exec_list_make_empty(&shader->outputs); + exec_list_make_empty(&shader->shared); shader->options = options; memset(&shader->info, 0, sizeof(shader->info)); @@ -52,6 +53,7 @@ nir_shader_create(void *mem_ctx, shader->num_inputs = 0; shader->num_outputs = 0; shader->num_uniforms = 0; + shader->num_shared = 0; shader->stage = stage; @@ -132,6 +134,11 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) exec_list_push_tail(&shader->uniforms, &var->node); break; + case nir_var_shared: + assert(shader->stage == MESA_SHADER_COMPUTE); + exec_list_push_tail(&shader->shared, &var->node); + break; + case nir_var_system_value: exec_list_push_tail(&shader->system_values, &var->node); break; @@ -254,16 +261,11 @@ cf_init(nir_cf_node *node, nir_cf_node_type type) } nir_function_impl * -nir_function_impl_create(nir_function *function) +nir_function_impl_create_bare(nir_shader *shader) { - assert(function->impl == NULL); - - void *mem_ctx = ralloc_parent(function); - - nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); + nir_function_impl *impl = ralloc(shader, nir_function_impl); - function->impl = impl; - impl->function = function; + impl->function = NULL; cf_init(&impl->cf_node, nir_cf_node_function); @@ -278,8 +280,8 @@ nir_function_impl_create(nir_function *function) impl->valid_metadata = nir_metadata_none; /* create start & end blocks */ - nir_block *start_block = nir_block_create(mem_ctx); - nir_block *end_block = nir_block_create(mem_ctx); + nir_block *start_block = nir_block_create(shader); + nir_block *end_block = nir_block_create(shader); start_block->cf_node.parent = &impl->cf_node; end_block->cf_node.parent = &impl->cf_node; impl->end_block = end_block; @@ -291,6 +293,23 @@ nir_function_impl_create(nir_function *function) return impl; } +nir_function_impl * +nir_function_impl_create(nir_function *function) +{ + assert(function->impl == NULL); + + nir_function_impl *impl = nir_function_impl_create_bare(function->shader); + + function->impl = impl; + impl->function = function; + + impl->num_params = function->num_params; + impl->params = ralloc_array(function->shader, + nir_variable *, impl->num_params); + + return impl; +} + nir_block * nir_block_create(nir_shader *shader) { @@ -486,8 +505,10 @@ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); + instr->texture_index = 0; + instr->texture_array_size = 0; + instr->texture = NULL; instr->sampler_index = 0; - instr->sampler_array_size = 0; instr->sampler = NULL; return instr; @@ -682,6 +703,69 @@ nir_cf_node_get_function(nir_cf_node *node) return nir_cf_node_as_function(node); } +/* Reduces a cursor by trying to convert everything to after and trying to + * go up to block granularity when possible. + */ +static nir_cursor +reduce_cursor(nir_cursor cursor) +{ + switch (cursor.option) { + case nir_cursor_before_block: + if (exec_list_is_empty(&cursor.block->instr_list)) { + /* Empty block. After is as good as before. */ + cursor.option = nir_cursor_after_block; + } else { + /* Try to switch to after the previous block if there is one. + * (This isn't likely, but it can happen.) + */ + nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node); + if (prev_node && prev_node->type == nir_cf_node_block) { + cursor.block = nir_cf_node_as_block(prev_node); + cursor.option = nir_cursor_after_block; + } + } + return cursor; + + case nir_cursor_after_block: + return cursor; + + case nir_cursor_before_instr: { + nir_instr *prev_instr = nir_instr_prev(cursor.instr); + if (prev_instr) { + /* Before this instruction is after the previous */ + cursor.instr = prev_instr; + cursor.option = nir_cursor_after_instr; + } else { + /* No previous instruction. Switch to before block */ + cursor.block = cursor.instr->block; + cursor.option = nir_cursor_before_block; + } + return reduce_cursor(cursor); + } + + case nir_cursor_after_instr: + if (nir_instr_next(cursor.instr) == NULL) { + /* This is the last instruction, switch to after block */ + cursor.option = nir_cursor_after_block; + cursor.block = cursor.instr->block; + } + return cursor; + + default: + unreachable("Inavlid cursor option"); + } +} + +bool +nir_cursors_equal(nir_cursor a, nir_cursor b) +{ + /* Reduced cursors should be unique */ + a = reduce_cursor(a); + b = reduce_cursor(b); + + return a.block == b.block && a.option == b.option; +} + static bool add_use_cb(nir_src *src, void *state) { @@ -1005,6 +1089,10 @@ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_src(&instr->src[i].src, cb, state)) return false; + if (instr->texture != NULL) + if (!visit_deref_src(instr->texture, cb, state)) + return false; + if (instr->sampler != NULL) if (!visit_deref_src(instr->sampler, cb, state)) return false; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index aec75fb930c..f130e5e0eb1 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -88,6 +88,7 @@ typedef enum { nir_var_local, nir_var_uniform, nir_var_shader_storage, + nir_var_shared, nir_var_system_value } nir_variable_mode; @@ -282,6 +283,11 @@ typedef struct nir_variable { int index; /** + * Descriptor set binding for sampler or UBO. + */ + int descriptor_set; + + /** * Initial binding point for a sampler or UBO. * * For array types, this represents the binding point for the first element. @@ -349,6 +355,34 @@ typedef struct nir_variable { #define nir_foreach_variable(var, var_list) \ foreach_list_typed(nir_variable, var, node, var_list) +/** + * Returns the bits in the inputs_read, outputs_written, or + * system_values_read bitfield corresponding to this variable. + */ +static inline uint64_t +nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage) +{ + assert(var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_system_value); + assert(var->data.location >= 0); + + const struct glsl_type *var_type = var->type; + if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) { + /* Most geometry shader inputs are per-vertex arrays */ + if (var->data.location >= VARYING_SLOT_VAR0) + assert(glsl_type_is_array(var_type)); + + if (glsl_type_is_array(var_type)) + var_type = glsl_get_array_element(var_type); + } + + bool is_vertex_input = (var->data.mode == nir_var_shader_in && + stage == MESA_SHADER_VERTEX); + unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input); + return ((1ull << slots) - 1) << var->data.location; +} + typedef struct nir_register { struct exec_node node; @@ -503,7 +537,11 @@ typedef struct nir_src { bool is_ssa; } nir_src; -#define NIR_SRC_INIT (nir_src) { { NULL } } +#ifdef __cplusplus +# define NIR_SRC_INIT nir_src() +#else +# define NIR_SRC_INIT (nir_src) { { NULL } } +#endif #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@ -526,7 +564,11 @@ typedef struct { bool is_ssa; } nir_dest; -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#ifdef __cplusplus +# define NIR_DEST_INIT nir_dest() +#else +# define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#endif #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) @@ -923,6 +965,7 @@ typedef enum { nir_tex_src_ms_index, /* MSAA sample index */ nir_tex_src_ddx, nir_tex_src_ddy, + nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_num_tex_src_types } nir_tex_src_type; @@ -973,6 +1016,24 @@ typedef struct { /* gather component selector */ unsigned component : 2; + /** The texture index + * + * If this texture instruction has a nir_tex_src_texture_offset source, + * then the texture index is given by texture_index + texture_offset. + */ + unsigned texture_index; + + /** The size of the texture array or 0 if it's not an array */ + unsigned texture_array_size; + + /** The texture deref + * + * If both this and `sampler` are both NULL, use texture_index instead. + * If `texture` is NULL, but `sampler` is non-NULL, then the texture is + * implied from the sampler. + */ + nir_deref_var *texture; + /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, @@ -980,10 +1041,11 @@ typedef struct { */ unsigned sampler_index; - /** The size of the sampler array or 0 if it's not an array */ - unsigned sampler_array_size; - - nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ + /** The sampler deref + * + * If this is null, use sampler_index instead. + */ + nir_deref_var *sampler; } nir_tex_instr; static inline unsigned @@ -1468,11 +1530,28 @@ typedef struct nir_shader_compiler_options { /** lowers ffract to fsub+ffloor: */ bool lower_ffract; + bool lower_pack_half_2x16; + bool lower_pack_unorm_2x16; + bool lower_pack_snorm_2x16; + bool lower_pack_unorm_4x8; + bool lower_pack_snorm_4x8; + bool lower_unpack_half_2x16; + bool lower_unpack_unorm_2x16; + bool lower_unpack_snorm_2x16; + bool lower_unpack_unorm_4x8; + bool lower_unpack_snorm_4x8; + + bool lower_extract_byte; + bool lower_extract_word; + /** * Does the driver support real 32-bit integers? (Otherwise, integers * are simulated by floats.) */ bool native_integers; + + /* Indicates that the driver only has zero-based vertex id */ + bool vertex_id_zero_based; } nir_shader_compiler_options; typedef struct nir_shader_info { @@ -1571,6 +1650,9 @@ typedef struct nir_shader { /** list of outputs (nir_variable) */ struct exec_list outputs; + /** list of shared compute variables (nir_variable) */ + struct exec_list shared; + /** Set of driver-specific options for the shader. * * The memory for the options is expected to be kept in a single static @@ -1599,7 +1681,7 @@ typedef struct nir_shader { * the highest index a load_input_*, load_uniform_*, etc. intrinsic can * access plus one */ - unsigned num_inputs, num_uniforms, num_outputs; + unsigned num_inputs, num_uniforms, num_outputs, num_shared; /** The shader stage, such as MESA_SHADER_VERTEX. */ gl_shader_stage stage; @@ -1643,6 +1725,8 @@ nir_variable *nir_local_variable_create(nir_function_impl *impl, nir_function *nir_function_create(nir_shader *shader, const char *name); nir_function_impl *nir_function_impl_create(nir_function *func); +/** creates a function_impl that isn't tied to any particular function */ +nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); nir_block *nir_block_create(nir_shader *shader); nir_if *nir_if_create(nir_shader *shader); @@ -1712,6 +1796,19 @@ typedef struct { }; } nir_cursor; +static inline nir_block * +nir_cursor_current_block(nir_cursor cursor) +{ + if (cursor.option == nir_cursor_before_instr || + cursor.option == nir_cursor_after_instr) { + return cursor.instr->block; + } else { + return cursor.block; + } +} + +bool nir_cursors_equal(nir_cursor a, nir_cursor b); + static inline nir_cursor nir_before_block(nir_block *block) { @@ -1778,6 +1875,22 @@ nir_after_cf_node(nir_cf_node *node) } static inline nir_cursor +nir_after_cf_node_and_phis(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); + assert(block->cf_node.type == nir_cf_node_block); + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + return nir_before_instr(instr); + } + return nir_after_block(block); +} + +static inline nir_cursor nir_before_cf_list(struct exec_list *cf_list) { nir_cf_node *first_node = exec_node_data(nir_cf_node, @@ -1903,7 +2016,9 @@ void nir_index_blocks(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_instr(const nir_instr *instr, FILE *fp); -nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); +nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); +nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl); +nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); @@ -1968,14 +2083,24 @@ int nir_gs_count_vertices(const nir_shader *shader); bool nir_split_var_copies(nir_shader *shader); +bool nir_lower_returns_impl(nir_function_impl *impl); +bool nir_lower_returns(nir_shader *shader); + +bool nir_inline_functions(nir_shader *shader); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); bool nir_lower_global_vars_to_local(nir_shader *shader); +bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask); + bool nir_lower_locals_to_regs(nir_shader *shader); -void nir_lower_outputs_to_temporaries(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader, + nir_function *entrypoint); + +void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, @@ -1989,7 +2114,7 @@ nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); void nir_lower_vars_to_ssa(nir_shader *shader); -bool nir_remove_dead_variables(nir_shader *shader); +bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode); void nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); @@ -2073,6 +2198,9 @@ bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); void nir_convert_to_ssa_impl(nir_function_impl *impl); void nir_convert_to_ssa(nir_shader *shader); +bool nir_repair_ssa_impl(nir_function_impl *impl); +bool nir_repair_ssa(nir_shader *shader); + /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi * node) to registers. diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index a30652f2afd..14c0e822ad8 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -108,7 +108,7 @@ class Constant(Value): if isinstance(self.value, (bool)): return 'NIR_TRUE' if self.value else 'NIR_FALSE' if isinstance(self.value, (int, long)): - return hex(struct.unpack('I', struct.pack('i', self.value))[0]) + return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0]) elif isinstance(self.value, float): return hex(struct.unpack('I', struct.pack('f', self.value))[0]) else: diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 88ba3a1c269..1c7c78acae8 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -70,6 +70,20 @@ nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) } static inline nir_ssa_def * +nir_ssa_undef(nir_builder *build, unsigned num_components) +{ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(build->shader, num_components); + if (!undef) + return NULL; + + nir_instr_insert(nir_before_block(nir_start_block(build->impl)), + &undef->instr); + + return &undef->def; +} + +static inline nir_ssa_def * nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) { nir_load_const_instr *load_const = @@ -274,6 +288,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + static inline nir_ssa_def * nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) { @@ -349,6 +380,45 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, nir_builder_instr_insert(build, &store->instr); } +static inline void +nir_store_deref_var(nir_builder *build, nir_deref_var *deref, + nir_ssa_def *value, unsigned writemask) +{ + const unsigned num_components = + glsl_get_vector_elements(nir_deref_tail(&deref->deref)->type); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); + store->num_components = num_components; + store->const_index[0] = writemask & ((1 << num_components) - 1); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref)); + store->src[0] = nir_src_for_ssa(value); + nir_builder_instr_insert(build, &store->instr); +} + +static inline void +nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src) +{ + assert(nir_deref_tail(&dest->deref)->type == + nir_deref_tail(&src->deref)->type); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + nir_builder_instr_insert(build, ©->instr); +} + +static inline void +nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) +{ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, dest); + copy->variables[1] = nir_deref_var_create(copy, src); + nir_builder_instr_insert(build, ©->instr); +} + static inline nir_ssa_def * nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) { @@ -361,4 +431,11 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) return &load->dest.ssa; } +static inline void +nir_jump(nir_builder *build, nir_jump_type jump_type) +{ + nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); + nir_builder_instr_insert(build, &jump->instr); +} + #endif /* NIR_BUILDER_H */ diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 5eff743d835..bc6df56b753 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -32,8 +32,11 @@ */ typedef struct { + /* True if we are cloning an entire shader. */ + bool global_clone; + /* maps orig ptr -> cloned ptr: */ - struct hash_table *ptr_table; + struct hash_table *remap_table; /* List of phi sources. */ struct list_head phi_srcs; @@ -43,28 +46,32 @@ typedef struct { } clone_state; static void -init_clone_state(clone_state *state) +init_clone_state(clone_state *state, bool global) { - state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + state->global_clone = global; + state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); list_inithead(&state->phi_srcs); } static void free_clone_state(clone_state *state) { - _mesa_hash_table_destroy(state->ptr_table, NULL); + _mesa_hash_table_destroy(state->remap_table, NULL); } -static void * -lookup_ptr(clone_state *state, const void *ptr) +static inline void * +_lookup_ptr(clone_state *state, const void *ptr, bool global) { struct hash_entry *entry; if (!ptr) return NULL; - entry = _mesa_hash_table_search(state->ptr_table, ptr); + if (!state->global_clone && global) + return (void *)ptr; + + entry = _mesa_hash_table_search(state->remap_table, ptr); assert(entry && "Failed to find pointer!"); if (!entry) return NULL; @@ -73,13 +80,37 @@ lookup_ptr(clone_state *state, const void *ptr) } static void -store_ptr(clone_state *state, void *nptr, const void *ptr) +add_remap(clone_state *state, void *nptr, const void *ptr) +{ + _mesa_hash_table_insert(state->remap_table, ptr, nptr); +} + +static void * +remap_local(clone_state *state, const void *ptr) { - _mesa_hash_table_insert(state->ptr_table, ptr, nptr); + return _lookup_ptr(state, ptr, false); } -static nir_constant * -clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) +static void * +remap_global(clone_state *state, const void *ptr) +{ + return _lookup_ptr(state, ptr, true); +} + +static nir_register * +remap_reg(clone_state *state, const nir_register *reg) +{ + return _lookup_ptr(state, reg, reg->is_global); +} + +static nir_variable * +remap_var(clone_state *state, const nir_variable *var) +{ + return _lookup_ptr(state, var, var->data.mode != nir_var_local); +} + +nir_constant * +nir_constant_clone(const nir_constant *c, nir_variable *nvar) { nir_constant *nc = ralloc(nvar, nir_constant); @@ -87,7 +118,7 @@ clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) nc->num_elements = c->num_elements; nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); for (unsigned i = 0; i < c->num_elements; i++) { - nc->elements[i] = clone_constant(state, c->elements[i], nvar); + nc->elements[i] = nir_constant_clone(c->elements[i], nvar); } return nc; @@ -100,7 +131,7 @@ static nir_variable * clone_variable(clone_state *state, const nir_variable *var) { nir_variable *nvar = rzalloc(state->ns, nir_variable); - store_ptr(state, nvar, var); + add_remap(state, nvar, var); nvar->type = var->type; nvar->name = ralloc_strdup(nvar, var->name); @@ -111,7 +142,7 @@ clone_variable(clone_state *state, const nir_variable *var) var->num_state_slots * sizeof(nir_state_slot)); if (var->constant_initializer) { nvar->constant_initializer = - clone_constant(state, var->constant_initializer, nvar); + nir_constant_clone(var->constant_initializer, nvar); } nvar->interface_type = var->interface_type; @@ -137,7 +168,7 @@ static nir_register * clone_register(clone_state *state, const nir_register *reg) { nir_register *nreg = rzalloc(state->ns, nir_register); - store_ptr(state, nreg, reg); + add_remap(state, nreg, reg); nreg->num_components = reg->num_components; nreg->num_array_elems = reg->num_array_elems; @@ -172,9 +203,9 @@ __clone_src(clone_state *state, void *ninstr_or_if, { nsrc->is_ssa = src->is_ssa; if (src->is_ssa) { - nsrc->ssa = lookup_ptr(state, src->ssa); + nsrc->ssa = remap_local(state, src->ssa); } else { - nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + nsrc->reg.reg = remap_reg(state, src->reg.reg); if (src->reg.indirect) { nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); @@ -190,9 +221,9 @@ __clone_dst(clone_state *state, nir_instr *ninstr, ndst->is_ssa = dst->is_ssa; if (dst->is_ssa) { nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); - store_ptr(state, &ndst->ssa, &dst->ssa); + add_remap(state, &ndst->ssa, &dst->ssa); } else { - ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + ndst->reg.reg = remap_reg(state, dst->reg.reg); if (dst->reg.indirect) { ndst->reg.indirect = ralloc(ninstr, nir_src); __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); @@ -208,7 +239,7 @@ static nir_deref_var * clone_deref_var(clone_state *state, const nir_deref_var *dvar, nir_instr *ninstr) { - nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_variable *nvar = remap_var(state, dvar->var); nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); if (dvar->deref.child) @@ -322,7 +353,7 @@ clone_load_const(clone_state *state, const nir_load_const_instr *lc) memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); - store_ptr(state, &nlc->def, &lc->def); + add_remap(state, &nlc->def, &lc->def); return nlc; } @@ -333,7 +364,7 @@ clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) nir_ssa_undef_instr *nsa = nir_ssa_undef_instr_create(state->ns, sa->def.num_components); - store_ptr(state, &nsa->def, &sa->def); + add_remap(state, &nsa->def, &sa->def); return nsa; } @@ -357,8 +388,11 @@ clone_tex(clone_state *state, const nir_tex_instr *tex) ntex->is_new_style_shadow = tex->is_new_style_shadow; memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); ntex->component = tex->component; + ntex->texture_index = tex->texture_index; + ntex->texture_array_size = tex->texture_array_size; + if (tex->texture) + ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); ntex->sampler_index = tex->sampler_index; - ntex->sampler_array_size = tex->sampler_array_size; if (tex->sampler) ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); @@ -420,7 +454,7 @@ clone_jump(clone_state *state, const nir_jump_instr *jmp) static nir_call_instr * clone_call(clone_state *state, const nir_call_instr *call) { - nir_function *ncallee = lookup_ptr(state, call->callee); + nir_function *ncallee = remap_global(state, call->callee); nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); for (unsigned i = 0; i < ncall->num_params; i++) @@ -473,7 +507,7 @@ clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) assert(exec_list_is_empty(&nblk->instr_list)); /* We need this for phi sources */ - store_ptr(state, nblk, blk); + add_remap(state, nblk, blk); nir_foreach_instr(blk, instr) { if (instr->type == nir_instr_type_phi) { @@ -546,10 +580,9 @@ clone_cf_list(clone_state *state, struct exec_list *dst, } static nir_function_impl * -clone_function_impl(clone_state *state, const nir_function_impl *fi, - nir_function *nfxn) +clone_function_impl(clone_state *state, const nir_function_impl *fi) { - nir_function_impl *nfi = nir_function_impl_create(nfxn); + nir_function_impl *nfi = nir_function_impl_create_bare(state->ns); clone_var_list(state, &nfi->locals, &fi->locals); clone_reg_list(state, &nfi->registers, &fi->registers); @@ -558,9 +591,9 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, nfi->num_params = fi->num_params; nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); for (unsigned i = 0; i < fi->num_params; i++) { - nfi->params[i] = lookup_ptr(state, fi->params[i]); + nfi->params[i] = remap_local(state, fi->params[i]); } - nfi->return_var = lookup_ptr(state, fi->return_var); + nfi->return_var = remap_local(state, fi->return_var); assert(list_empty(&state->phi_srcs)); @@ -572,9 +605,9 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, * add it to the phi_srcs list and we fix it up here. */ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { - src->pred = lookup_ptr(state, src->pred); + src->pred = remap_local(state, src->pred); assert(src->src.is_ssa); - src->src.ssa = lookup_ptr(state, src->src.ssa); + src->src.ssa = remap_local(state, src->src.ssa); /* Remove from this list and place in the uses of the SSA def */ list_del(&src->src.use_link); @@ -588,6 +621,22 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi, return nfi; } +nir_function_impl * +nir_function_impl_clone(const nir_function_impl *fi) +{ + clone_state state; + init_clone_state(&state, false); + + /* We use the same shader */ + state.ns = fi->function->shader; + + nir_function_impl *nfi = clone_function_impl(&state, fi); + + free_clone_state(&state); + + return nfi; +} + static nir_function * clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) { @@ -595,7 +644,7 @@ clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) nir_function *nfxn = nir_function_create(ns, fxn->name); /* Needed for call instructions */ - store_ptr(state, nfxn, fxn); + add_remap(state, nfxn, fxn); nfxn->num_params = fxn->num_params; nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params); @@ -616,7 +665,7 @@ nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s) { clone_state state; - init_clone_state(&state); + init_clone_state(&state, true); nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); state.ns = ns; @@ -624,6 +673,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) clone_var_list(&state, &ns->uniforms, &s->uniforms); clone_var_list(&state, &ns->inputs, &s->inputs); clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->shared, &s->shared); clone_var_list(&state, &ns->globals, &s->globals); clone_var_list(&state, &ns->system_values, &s->system_values); @@ -637,8 +687,9 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) * will have in the list. */ nir_foreach_function(s, fxn) { - nir_function *nfxn = lookup_ptr(&state, fxn); - clone_function_impl(&state, fxn->impl, nfxn); + nir_function *nfxn = remap_global(&state, fxn); + nfxn->impl = clone_function_impl(&state, fxn->impl); + nfxn->impl->function = nfxn; } clone_reg_list(&state, &ns->registers, &s->registers); @@ -652,6 +703,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_inputs = s->num_inputs; ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; + ns->num_shared = s->num_shared; free_clone_state(&state); diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c index 96395a41615..33b06d0cc84 100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@ -336,8 +336,7 @@ block_add_normal_succs(nir_block *block) nir_block *next_block = nir_cf_node_as_block(next); link_blocks(block, next_block, NULL); - } else { - assert(parent->type == nir_cf_node_loop); + } else if (parent->type == nir_cf_node_loop) { nir_loop *loop = nir_cf_node_as_loop(parent); nir_cf_node *head = nir_loop_first_cf_node(loop); @@ -346,6 +345,10 @@ block_add_normal_succs(nir_block *block) link_blocks(block, head_block, NULL); insert_phi_undef(head_block, block); + } else { + assert(parent->type == nir_cf_node_function); + nir_function_impl *impl = nir_cf_node_as_function(parent); + link_blocks(block, impl->end_block, NULL); } } else { nir_cf_node *next = nir_cf_node_next(&block->cf_node); @@ -746,6 +749,12 @@ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) { nir_block *block_begin, *block_end, *block_before, *block_after; + if (nir_cursors_equal(begin, end)) { + exec_list_make_empty(&extracted->list); + extracted->impl = NULL; /* we shouldn't need this */ + return; + } + /* In the case where begin points to an instruction in some basic block and * end points to the end of the same basic block, we rely on the fact that * splitting on an instruction moves earlier instructions into a new basic @@ -785,6 +794,9 @@ nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) { nir_block *before, *after; + if (exec_list_is_empty(&cf_list->list)) + return; + split_block_cursor(cursor, &before, &after); foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { diff --git a/src/compiler/nir/nir_dominance.c b/src/compiler/nir/nir_dominance.c index b345b85e8a0..d95f3968074 100644 --- a/src/compiler/nir/nir_dominance.c +++ b/src/compiler/nir/nir_dominance.c @@ -94,7 +94,6 @@ calc_dominance_cb(nir_block *block, void *_state) } } - assert(new_idom); if (block->imm_dom != new_idom) { block->imm_dom = new_idom; state->progress = true; @@ -112,6 +111,11 @@ calc_dom_frontier_cb(nir_block *block, void *state) struct set_entry *entry; set_foreach(block->predecessors, entry) { nir_block *runner = (nir_block *) entry->key; + + /* Skip unreachable predecessors */ + if (runner->imm_dom == NULL) + continue; + while (runner != block->imm_dom) { _mesa_set_add(runner->dom_frontier, block); runner = runner->imm_dom; diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c new file mode 100644 index 00000000000..b84915c2d2b --- /dev/null +++ b/src/compiler/nir/nir_gather_info.c @@ -0,0 +1,109 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +static void +gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) +{ + switch (instr->intrinsic) { + case nir_intrinsic_discard: + assert(shader->stage == MESA_SHADER_FRAGMENT); + shader->info.fs.uses_discard = true; + break; + + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_pos: + case nir_intrinsic_load_sample_mask_in: + case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_invocation_id: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_num_work_groups: + shader->info.system_values_read |= + (1 << nir_system_value_from_intrinsic(instr->intrinsic)); + break; + + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + assert(shader->stage == MESA_SHADER_GEOMETRY); + shader->info.gs.uses_end_primitive = 1; + break; + + default: + break; + } +} + +static void +gather_tex_info(nir_tex_instr *instr, nir_shader *shader) +{ + if (instr->op == nir_texop_tg4) + shader->info.uses_texture_gather = true; +} + +static bool +gather_info_block(nir_block *block, void *shader) +{ + nir_foreach_instr(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: + gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader); + break; + case nir_instr_type_tex: + gather_tex_info(nir_instr_as_tex(instr), shader); + break; + case nir_instr_type_call: + assert(!"nir_shader_gather_info only works if functions are inlined"); + break; + default: + break; + } + } + + return true; +} + +void +nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) +{ + shader->info.inputs_read = 0; + foreach_list_typed(nir_variable, var, node, &shader->inputs) + shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage); + + /* TODO: Some day we may need to add stream support to NIR */ + shader->info.outputs_written = 0; + foreach_list_typed(nir_variable, var, node, &shader->outputs) + shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage); + + shader->info.system_values_read = 0; + foreach_list_typed(nir_variable, var, node, &shader->system_values) + shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage); + + nir_foreach_block(entrypoint, gather_info_block, shader); +} diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c new file mode 100644 index 00000000000..3cf83279053 --- /dev/null +++ b/src/compiler/nir/nir_inline_functions.c @@ -0,0 +1,153 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_control_flow.h" + +struct inline_functions_state { + struct set *inlined; + nir_builder builder; + bool progress; +}; + +static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); + +static bool +inline_functions_block(nir_block *block, void *void_state) +{ + struct inline_functions_state *state = void_state; + + nir_builder *b = &state->builder; + + /* This is tricky. We're iterating over instructions in a block but, as + * we go, the block and its instruction list are being split into + * pieces. However, this *should* be safe since foreach_safe always + * stashes the next thing in the iteration. That next thing will + * properly get moved to the next block when it gets split, and we + * continue iterating there. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_call) + continue; + + state->progress = true; + + nir_call_instr *call = nir_instr_as_call(instr); + assert(call->callee->impl); + + inline_function_impl(call->callee->impl, state->inlined); + + nir_function_impl *callee_copy = + nir_function_impl_clone(call->callee->impl); + + exec_list_append(&b->impl->locals, &callee_copy->locals); + exec_list_append(&b->impl->registers, &callee_copy->registers); + + b->cursor = nir_before_instr(&call->instr); + + /* Add copies of all in parameters */ + assert(call->num_params == callee_copy->num_params); + for (unsigned i = 0; i < callee_copy->num_params; i++) { + /* Only in or inout parameters */ + if (call->callee->params[i].param_type == nir_parameter_out) + continue; + + nir_copy_deref_var(b, nir_deref_var_create(b->shader, + callee_copy->params[i]), + call->params[i]); + } + + /* Pluck the body out of the function and place it here */ + nir_cf_list body; + nir_cf_list_extract(&body, &callee_copy->body); + nir_cf_reinsert(&body, b->cursor); + + b->cursor = nir_before_instr(&call->instr); + + /* Add copies of all out parameters and the return */ + assert(call->num_params == callee_copy->num_params); + for (unsigned i = 0; i < callee_copy->num_params; i++) { + /* Only out or inout parameters */ + if (call->callee->params[i].param_type == nir_parameter_in) + continue; + + nir_copy_deref_var(b, call->params[i], + nir_deref_var_create(b->shader, + callee_copy->params[i])); + } + if (!glsl_type_is_void(call->callee->return_type)) { + nir_copy_deref_var(b, call->return_deref, + nir_deref_var_create(b->shader, + callee_copy->return_var)); + } + + nir_instr_remove(&call->instr); + } + + return true; +} + +static bool +inline_function_impl(nir_function_impl *impl, struct set *inlined) +{ + if (_mesa_set_search(inlined, impl)) + return false; /* Already inlined */ + + struct inline_functions_state state; + + state.inlined = inlined; + state.progress = false; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, inline_functions_block, &state); + + if (state.progress) { + /* SSA and register indices are completely messed up now */ + nir_index_ssa_defs(impl); + nir_index_local_regs(impl); + + nir_metadata_preserve(impl, nir_metadata_none); + } + + _mesa_set_add(inlined, impl); + + return state.progress; +} + +bool +nir_inline_functions(nir_shader *shader) +{ + struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = inline_function_impl(function->impl, inlined) || progress; + } + + _mesa_set_destroy(inlined, NULL); + + return progress; +} diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index d3f939fe805..eb021326097 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -155,8 +155,9 @@ hash_tex(uint32_t hash, const nir_tex_instr *instr) hash = HASH(hash, instr->const_offset); unsigned component = instr->component; hash = HASH(hash, component); + hash = HASH(hash, instr->texture_index); + hash = HASH(hash, instr->texture_array_size); hash = HASH(hash, instr->sampler_index); - hash = HASH(hash, instr->sampler_array_size); assert(!instr->sampler); @@ -305,13 +306,15 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) memcmp(tex1->const_offset, tex2->const_offset, sizeof(tex1->const_offset)) != 0 || tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { + tex1->texture_index != tex2->texture_index || + tex1->texture_array_size != tex2->texture_array_size || + tex1->sampler_index != tex2->sampler_index) { return false; } /* Don't support un-lowered sampler derefs currently. */ - assert(!tex1->sampler && !tex2->sampler); + assert(!tex1->texture && !tex1->sampler && + !tex2->texture && !tex2->sampler); return true; } @@ -422,7 +425,7 @@ instr_can_rewrite(nir_instr *instr) nir_tex_instr *tex = nir_instr_as_tex(instr); /* Don't support un-lowered sampler derefs currently. */ - if (tex->sampler) + if (tex->texture || tex->sampler) return false; return true; diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 62eead4878a..3e7cf735a1b 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -176,6 +176,52 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* + * Vulkan descriptor set intrinsic + * + * The Vulkan API uses a different binding model from GL. In the Vulkan + * API, all external resources are represented by a tripple: + * + * (descriptor set, binding, array index) + * + * where the array index is the only thing allowed to be indirect. The + * vulkan_surface_index intrinsic takes the descriptor set and binding as + * its first two indices and the array index as its source. The third + * index is a nir_variable_mode in case that's useful to the backend. + * + * The intended usage is that the shader will call vulkan_surface_index to + * get an index and then pass that as the buffer index ubo/ssbo calls. + */ +INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* + * variable atomic intrinsics + * + * All of these variable atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new value + * to memory, and return the original value read. + * + * All operations take 1 source except CompSwap that takes 2. These sources + * represent: + * + * 0: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 1: For CompSwap only: the second data parameter. + * + * All operations take 1 variable deref. + */ +INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0) +INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0) + +/* * SSBO atomic intrinsics * * All of the SSBO atomic memory operations read a value from memory, @@ -265,6 +311,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0) * of the start of the variable being loaded and and the offset source is a * offset into that variable. * + * Uniform load operations have a second index that specifies the size of the + * variable being loaded. If const_index[1] == 0, then the size is unknown. + * * Some load operations such as UBO/SSBO load and per_vertex loads take an * additional source to specify which UBO/SSBO/vertex to load from. * @@ -277,8 +326,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0) #define LOAD(name, srcs, indices, flags) \ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) -/* src[] = { offset }. const_index[] = { base } */ -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base, size } */ +LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { offset }. const_index[] = { base } */ @@ -293,6 +342,8 @@ LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) /* src[] = { offset }. const_index[] = { base } */ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base, size } */ +LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* * Stores work the same way as loads, except now the first source is the value diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 0a27e66cf0f..37cb0221e0b 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -97,6 +97,20 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) */ return; + case nir_op_pack_half_2x16: + if (!b->shader->options->lower_pack_half_2x16) + return; + + nir_ssa_def *val = + nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[0]), + nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[1])); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + return; + case nir_op_unpack_unorm_4x8: case nir_op_unpack_snorm_4x8: case nir_op_unpack_unorm_2x16: @@ -106,11 +120,51 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) */ return; - case nir_op_unpack_half_2x16: - /* We could split this into unpack_half_2x16_split_[xy], but should - * we? - */ + case nir_op_unpack_half_2x16: { + if (!b->shader->options->lower_unpack_half_2x16) + return; + + nir_ssa_def *comps[2]; + comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa); + comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa); + nir_ssa_def *vec = nir_vec(b, comps, 2); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); + nir_instr_remove(&instr->instr); return; + } + + case nir_op_pack_uvec2_to_uint: { + assert(b->shader->options->lower_pack_snorm_2x16 || + b->shader->options->lower_pack_unorm_2x16); + + nir_ssa_def *word = + nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *val = + nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), + nir_channel(b, word, 0)); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + break; + } + + case nir_op_pack_uvec4_to_uint: { + assert(b->shader->options->lower_pack_snorm_4x8 || + b->shader->options->lower_pack_unorm_4x8); + + nir_ssa_def *byte = + nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *val = + nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), + nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), + nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)), + nir_channel(b, byte, 0))); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + break; + } case nir_op_fdph: { nir_ssa_def *sum[4]; diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index 1a4458d4f84..b07e199d71b 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -63,7 +63,8 @@ lower_instr(nir_intrinsic_instr *instr, } if (instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage) + instr->variables[0]->var->data.mode != nir_var_shader_storage && + instr->variables[0]->var->data.mode != nir_var_shared) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c new file mode 100644 index 00000000000..69f2df4ba6d --- /dev/null +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -0,0 +1,239 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +static void +emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *tail, + nir_ssa_def **dest, nir_ssa_def *src); + +static void +emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *arr_parent, + int start, int end, + nir_ssa_def **dest, nir_ssa_def *src) +{ + assert(arr_parent->child && + arr_parent->child->deref_type == nir_deref_type_array); + nir_deref_array *arr = nir_deref_as_array(arr_parent->child); + assert(arr->deref_array_type == nir_deref_array_type_indirect); + assert(arr->indirect.is_ssa); + + assert(start < end); + if (start == end - 1) { + /* Base case. Just emit the load/store op */ + nir_deref_array direct = *arr; + direct.deref_array_type = nir_deref_array_type_direct; + direct.base_offset += start; + direct.indirect = NIR_SRC_INIT; + + arr_parent->child = &direct.deref; + emit_load_store(b, orig_instr, deref, &arr->deref, dest, src); + arr_parent->child = &arr->deref; + } else { + int mid = start + (end - start) / 2; + + nir_ssa_def *then_dest, *else_dest; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_ilt(b, arr->indirect.ssa, + nir_imm_int(b, mid))); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + b->cursor = nir_after_cf_list(&if_stmt->then_list); + emit_indirect_load_store(b, orig_instr, deref, arr_parent, + start, mid, &then_dest, src); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + emit_indirect_load_store(b, orig_instr, deref, arr_parent, + mid, end, &else_dest, src); + + b->cursor = nir_after_cf_node(&if_stmt->cf_node); + + if (src == NULL) { + /* We're a load. We need to insert a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + then_dest->num_components, NULL); + + nir_phi_src *src0 = ralloc(phi, nir_phi_src); + src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt)); + src0->src = nir_src_for_ssa(then_dest); + exec_list_push_tail(&phi->srcs, &src0->node); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + src1->pred = nir_cf_node_as_block(nir_if_last_else_node(if_stmt)); + src1->src = nir_src_for_ssa(else_dest); + exec_list_push_tail(&phi->srcs, &src1->node); + + nir_builder_instr_insert(b, &phi->instr); + *dest = &phi->dest.ssa; + } + } +} + +static void +emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, + nir_deref_var *deref, nir_deref *tail, + nir_ssa_def **dest, nir_ssa_def *src) +{ + for (; tail->child; tail = tail->child) { + if (tail->child->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail->child); + if (arr->deref_array_type != nir_deref_array_type_indirect) + continue; + + int length = glsl_get_length(tail->type); + + emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset, + length - arr->base_offset, dest, src); + return; + } + + assert(tail && tail->child == NULL); + + /* We reached the end of the deref chain. Emit the instruction */ + + if (src == NULL) { + /* This is a load instruction */ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->num_components = orig_instr->num_components; + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &deref->deref)); + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, NULL); + nir_builder_instr_insert(b, &load->instr); + *dest = &load->dest.ssa; + } else { + /* This is a store instruction */ + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->num_components = orig_instr->num_components; + store->const_index[0] = orig_instr->const_index[0]; /* writemask */ + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &deref->deref)); + store->src[0] = nir_src_for_ssa(src); + nir_builder_instr_insert(b, &store->instr); + } +} + +static bool +deref_has_indirect(nir_deref_var *deref) +{ + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + } + + return false; +} + +struct lower_indirect_state { + nir_builder builder; + uint32_t mode_mask; + bool progress; +}; + +static bool +lower_indirect_block(nir_block *block, void *void_state) +{ + struct lower_indirect_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + if (!deref_has_indirect(intrin->variables[0])) + continue; + + /* Only lower variables whose mode is in the mask */ + if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode))) + continue; + + state->builder.cursor = nir_before_instr(&intrin->instr); + + if (intrin->intrinsic == nir_intrinsic_load_var) { + nir_ssa_def *result; + emit_load_store(&state->builder, intrin, intrin->variables[0], + &intrin->variables[0]->deref, &result, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result)); + } else { + assert(intrin->src[0].is_ssa); + emit_load_store(&state->builder, intrin, intrin->variables[0], + &intrin->variables[0]->deref, NULL, intrin->src[0].ssa); + } + nir_instr_remove(&intrin->instr); + state->progress = true; + } + + return true; +} + +static bool +lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask) +{ + struct lower_indirect_state state; + + state.progress = false; + state.mode_mask = mode_mask; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, lower_indirect_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +/** Lowers indirect variable loads/stores to direct loads/stores. + * + * The pass works by replacing any indirect load or store with an if-ladder + * that does a binary search on the array index. + */ +bool +nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = lower_indirects_impl(function->impl, mode_mask) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 80c5151f0ea..2c5fa16af5e 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -160,12 +160,56 @@ load_op(struct lower_io_state *state, case nir_var_uniform: op = nir_intrinsic_load_uniform; break; + case nir_var_shared: + op = nir_intrinsic_load_shared; + break; default: unreachable("Unknown variable mode"); } return op; } +static nir_intrinsic_op +store_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex) +{ + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + case nir_var_shader_out: + op = per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; + break; + case nir_var_shared: + op = nir_intrinsic_store_shared; + break; + default: + unreachable("Unknown variable mode"); + } + return op; +} + +static nir_intrinsic_op +atomic_op(nir_intrinsic_op opcode) +{ + switch (opcode) { +#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O; + OP(atomic_exchange) + OP(atomic_comp_swap) + OP(atomic_add) + OP(atomic_imin) + OP(atomic_umin) + OP(atomic_imax) + OP(atomic_umax) + OP(atomic_and) + OP(atomic_or) + OP(atomic_xor) +#undef OP + default: + unreachable("Invalid atomic"); + } +} + static bool nir_lower_io_block(nir_block *block, void *void_state) { @@ -179,9 +223,25 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_var && - intrin->intrinsic != nir_intrinsic_store_var) + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + case nir_intrinsic_store_var: + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: + /* We can lower the io for this nir instrinsic */ + break; + default: + /* We can't lower the io for this nir instrinsic, so skip it */ continue; + } nir_variable_mode mode = intrin->variables[0]->var->data.mode; @@ -190,6 +250,7 @@ nir_lower_io_block(nir_block *block, void *void_state) if (mode != nir_var_shader_in && mode != nir_var_shader_out && + mode != nir_var_shared && mode != nir_var_uniform) continue; @@ -216,6 +277,11 @@ nir_lower_io_block(nir_block *block, void *void_state) load->const_index[0] = intrin->variables[0]->var->data.driver_location; + if (load->intrinsic == nir_intrinsic_load_uniform) { + load->const_index[1] = + state->type_size(intrin->variables[0]->var->type); + } + if (per_vertex) load->src[0] = nir_src_for_ssa(vertex_index); @@ -236,7 +302,7 @@ nir_lower_io_block(nir_block *block, void *void_state) } case nir_intrinsic_store_var: { - assert(mode == nir_var_shader_out); + assert(mode == nir_var_shader_out || mode == nir_var_shared); nir_ssa_def *offset; nir_ssa_def *vertex_index; @@ -248,12 +314,9 @@ nir_lower_io_block(nir_block *block, void *void_state) per_vertex ? &vertex_index : NULL, state->type_size); - nir_intrinsic_op store_op = - per_vertex ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, - store_op); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(state->mem_ctx, + store_op(state, mode, per_vertex)); store->num_components = intrin->num_components; nir_src_copy(&store->src[0], &intrin->src[0], store); @@ -274,6 +337,51 @@ nir_lower_io_block(nir_block *block, void *void_state) break; } + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: { + assert(mode == nir_var_shared); + + nir_ssa_def *offset; + + offset = get_io_offset(b, intrin->variables[0], + NULL, state->type_size); + + nir_intrinsic_instr *atomic = + nir_intrinsic_instr_create(state->mem_ctx, + atomic_op(intrin->intrinsic)); + + atomic->src[0] = nir_src_for_ssa(offset); + + atomic->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + nir_src_copy(&atomic->src[1], &intrin->src[0], atomic); + + if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap) + nir_src_copy(&atomic->src[2], &intrin->src[1], atomic); + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&atomic->instr, &atomic->dest, + intrin->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&atomic->dest.ssa)); + } else { + nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &atomic->instr); + nir_instr_remove(&intrin->instr); + break; + } + default: break; } @@ -321,10 +429,13 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_load_output: case nir_intrinsic_load_uniform: return &instr->src[0]; + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: case nir_intrinsic_store_output: return &instr->src[1]; + case nir_intrinsic_store_ssbo: case nir_intrinsic_store_per_vertex_output: return &instr->src[2]; default: diff --git a/src/compiler/nir/nir_lower_outputs_to_temporaries.c b/src/compiler/nir/nir_lower_outputs_to_temporaries.c index 71b06b81fcc..00ac09114cf 100644 --- a/src/compiler/nir/nir_lower_outputs_to_temporaries.c +++ b/src/compiler/nir/nir_lower_outputs_to_temporaries.c @@ -74,7 +74,7 @@ emit_output_copies_block(nir_block *block, void *state) } void -nir_lower_outputs_to_temporaries(nir_shader *shader) +nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint) { struct lower_outputs_state state; @@ -97,6 +97,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader) /* Reparent the name to the new variable */ ralloc_steal(output, output->name); + /* Reparent the constant initializer (if any) */ + ralloc_steal(output, output->constant_initializer); + /* Give the output a new name with @out-temp appended */ temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); temp->data.mode = nir_var_global; @@ -114,7 +117,7 @@ nir_lower_outputs_to_temporaries(nir_shader *shader) * before each EmitVertex call. */ nir_foreach_block(function->impl, emit_output_copies_block, &state); - } else if (strcmp(function->name, "main") == 0) { + } else if (function == entrypoint) { /* For all other shader types, we need to do the copies right before * the jumps to the end block. */ diff --git a/src/compiler/nir/nir_lower_returns.c b/src/compiler/nir/nir_lower_returns.c new file mode 100644 index 00000000000..91bb2f7dfeb --- /dev/null +++ b/src/compiler/nir/nir_lower_returns.c @@ -0,0 +1,246 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_control_flow.h" + +struct lower_returns_state { + nir_builder builder; + struct exec_list *cf_list; + nir_loop *loop; + nir_variable *return_flag; +}; + +static bool lower_returns_in_cf_list(struct exec_list *cf_list, + struct lower_returns_state *state); + +static void +predicate_following(nir_cf_node *node, struct lower_returns_state *state) +{ + nir_builder *b = &state->builder; + b->cursor = nir_after_cf_node_and_phis(node); + + if (nir_cursors_equal(b->cursor, nir_after_cf_list(state->cf_list))) + return; /* Nothing to predicate */ + + assert(state->return_flag); + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_load_var(b, state->return_flag)); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + if (state->loop) { + /* If we're inside of a loop, then all we need to do is insert a + * conditional break. + */ + nir_jump_instr *brk = + nir_jump_instr_create(state->builder.shader, nir_jump_break); + nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr); + } else { + /* Otherwise, we need to actually move everything into the else case + * of the if statement. + */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node(&if_stmt->cf_node), + nir_after_cf_list(state->cf_list)); + assert(!exec_list_is_empty(&list.list)); + nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list)); + } +} + +static bool +lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state) +{ + nir_loop *parent = state->loop; + state->loop = loop; + bool progress = lower_returns_in_cf_list(&loop->body, state); + state->loop = parent; + + /* If the recursive call made progress, then there were returns inside + * of the loop. These would have been lowered to breaks with the return + * flag set to true. We need to predicate everything following the loop + * on the return flag. + */ + if (progress) + predicate_following(&loop->cf_node, state); + + return progress; +} + +static bool +lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state) +{ + bool progress; + + progress = lower_returns_in_cf_list(&if_stmt->then_list, state); + progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress; + + /* If either of the recursive calls made progress, then there were + * returns inside of the body of the if. If we're in a loop, then these + * were lowered to breaks which automatically skip to the end of the + * loop so we don't have to do anything. If we're not in a loop, then + * all we know is that the return flag is set appropreately and that the + * recursive calls ensured that nothing gets executed *inside* the if + * after a return. In order to ensure nothing outside gets executed + * after a return, we need to predicate everything following on the + * return flag. + */ + if (progress && !state->loop) + predicate_following(&if_stmt->cf_node, state); + + return progress; +} + +static bool +lower_returns_in_block(nir_block *block, struct lower_returns_state *state) +{ + if (block->predecessors->entries == 0 && + block != nir_start_block(state->builder.impl)) { + /* This block is unreachable. Delete it and everything after it. */ + nir_cf_list list; + nir_cf_extract(&list, nir_before_cf_node(&block->cf_node), + nir_after_cf_list(state->cf_list)); + + if (exec_list_is_empty(&list.list)) { + /* There's nothing here, which also means there's nothing in this + * block so we have nothing to do. + */ + return false; + } else { + nir_cf_delete(&list); + return true; + } + } + + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr == NULL) + return false; + + if (last_instr->type != nir_instr_type_jump) + return false; + + nir_jump_instr *jump = nir_instr_as_jump(last_instr); + if (jump->type != nir_jump_return) + return false; + + nir_instr_remove(&jump->instr); + + nir_builder *b = &state->builder; + b->cursor = nir_after_block(block); + + /* Set the return flag */ + if (state->return_flag == NULL) { + state->return_flag = + nir_local_variable_create(b->impl, glsl_bool_type(), "return"); + + /* Set a default value of false */ + state->return_flag->constant_initializer = + rzalloc(state->return_flag, nir_constant); + } + nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1); + + if (state->loop) { + /* We're in a loop; we need to break out of it. */ + nir_jump(b, nir_jump_break); + } else { + /* Not in a loop; we'll deal with predicating later*/ + assert(nir_cf_node_next(&block->cf_node) == NULL); + } + + return true; +} + +static bool +lower_returns_in_cf_list(struct exec_list *cf_list, + struct lower_returns_state *state) +{ + bool progress = false; + + struct exec_list *parent_list = state->cf_list; + state->cf_list = cf_list; + + /* We iterate over the list backwards because any given lower call may + * take everything following the given CF node and predicate it. In + * order to avoid recursion/iteration problems, we want everything after + * a given node to already be lowered before this happens. + */ + foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: + if (lower_returns_in_block(nir_cf_node_as_block(node), state)) + progress = true; + break; + + case nir_cf_node_if: + if (lower_returns_in_if(nir_cf_node_as_if(node), state)) + progress = true; + break; + + case nir_cf_node_loop: + if (lower_returns_in_loop(nir_cf_node_as_loop(node), state)) + progress = true; + break; + + default: + unreachable("Invalid inner CF node type"); + } + } + + state->cf_list = parent_list; + + return progress; +} + +bool +nir_lower_returns_impl(nir_function_impl *impl) +{ + struct lower_returns_state state; + + state.cf_list = &impl->body; + state.loop = NULL; + state.return_flag = NULL; + nir_builder_init(&state.builder, impl); + + bool progress = lower_returns_in_cf_list(&impl->body, &state); + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_none); + nir_repair_ssa_impl(impl); + } + + return progress; +} + +bool +nir_lower_returns(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_lower_returns_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_lower_samplers.c b/src/compiler/nir/nir_lower_samplers.c index 96e82914014..29654136aee 100644 --- a/src/compiler/nir/nir_lower_samplers.c +++ b/src/compiler/nir/nir_lower_samplers.c @@ -94,6 +94,9 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr if (instr->sampler == NULL) return; + /* GLSL only has combined textures/samplers */ + assert(instr->texture == NULL); + instr->sampler_index = 0; unsigned location = instr->sampler->var->data.location; unsigned array_elements = 1; @@ -106,7 +109,7 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr if (indirect) { /* First, we have to resize the array of texture sources */ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); + instr->num_srcs + 2); for (unsigned i = 0; i < instr->num_srcs; i++) { new_srcs[i].src_type = instr->src[i].src_type; @@ -120,13 +123,19 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr /* Now we can go ahead and move the source over to being a * first-class texture source. */ + instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; instr->num_srcs++; nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, nir_src_for_ssa(indirect)); - instr->sampler_array_size = array_elements; + instr->texture_array_size = array_elements; } if (location > shader_program->NumUniformStorage - 1 || @@ -139,6 +148,8 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr shader_program->UniformStorage[location].opaque[stage].index; instr->sampler = NULL; + + instr->texture_index = instr->sampler_index; } typedef struct { diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 2bd787d3574..79f6bedc990 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -55,9 +55,77 @@ convert_block(nir_block *block, void *void_state) b->cursor = nir_after_instr(&load_var->instr); - nir_intrinsic_op sysval_op = - nir_intrinsic_from_system_value(var->data.location); - nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); + nir_ssa_def *sysval; + switch (var->data.location) { + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: { + /* From the GLSL man page for gl_GlobalInvocationID: + * + * "The value of gl_GlobalInvocationID is equal to + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" + */ + + nir_const_value local_size; + local_size.u[0] = b->shader->info.cs.local_size[0]; + local_size.u[1] = b->shader->info.cs.local_size[1]; + local_size.u[2] = b->shader->info.cs.local_size[2]; + + nir_ssa_def *group_id = + nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *local_id = + nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); + + sysval = nir_iadd(b, nir_imul(b, group_id, + nir_build_imm(b, 3, local_size)), + local_id); + break; + } + + case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: { + /* From the GLSL man page for gl_LocalInvocationIndex: + * + * ?The value of gl_LocalInvocationIndex is equal to + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * + * gl_WorkGroupSize.y + gl_LocalInvocationID.y * + * gl_WorkGroupSize.x + gl_LocalInvocationID.x" + */ + nir_ssa_def *local_id = + nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); + + unsigned stride_y = b->shader->info.cs.local_size[0]; + unsigned stride_z = b->shader->info.cs.local_size[0] * + b->shader->info.cs.local_size[1]; + + sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2), + nir_imm_int(b, stride_z)), + nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1), + nir_imm_int(b, stride_y)), + nir_channel(b, local_id, 0))); + break; + } + + case SYSTEM_VALUE_VERTEX_ID: + if (b->shader->options->vertex_id_zero_based) { + sysval = nir_iadd(b, + nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0), + nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0)); + } else { + sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0); + } + break; + + case SYSTEM_VALUE_INSTANCE_INDEX: + sysval = nir_iadd(b, + nir_load_system_value(b, nir_intrinsic_load_instance_id, 0), + nir_load_system_value(b, nir_intrinsic_load_base_instance, 0)); + break; + + default: { + nir_intrinsic_op sysval_op = + nir_intrinsic_from_system_value(var->data.location); + sysval = nir_load_system_value(b, sysval_op, 0); + break; + } /* default */ + } nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); nir_instr_remove(&load_var->instr); diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index 75d31ff60af..e1f368d2f2b 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -27,6 +27,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_phi_builder.h" #include "nir_vla.h" @@ -47,8 +48,7 @@ struct deref_node { struct set *stores; struct set *copies; - nir_ssa_def **def_stack; - nir_ssa_def **def_stack_tail; + struct nir_phi_builder_value *pb_value; struct deref_node *wildcard; struct deref_node *indirect; @@ -87,8 +87,7 @@ struct lower_variables_state { */ bool add_to_direct_deref_nodes; - /* A hash table mapping phi nodes to deref_state data */ - struct hash_table *phi_table; + struct nir_phi_builder *phi_builder; }; static struct deref_node * @@ -473,114 +472,6 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/** Pushes an SSA def onto the def stack for the given node - * - * Each node is potentially associated with a stack of SSA definitions. - * This stack is used for determining what SSA definition reaches a given - * point in the program for variable renaming. The stack is always kept in - * dominance-order with at most one SSA def per block. If the SSA - * definition on the top of the stack is in the same block as the one being - * pushed, the top element is replaced. - */ -static void -def_stack_push(struct deref_node *node, nir_ssa_def *def, - struct lower_variables_state *state) -{ - if (node->def_stack == NULL) { - node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, - state->impl->num_blocks); - node->def_stack_tail = node->def_stack - 1; - } - - if (node->def_stack_tail >= node->def_stack) { - nir_ssa_def *top_def = *node->def_stack_tail; - - if (def->parent_instr->block == top_def->parent_instr->block) { - /* They're in the same block, just replace the top */ - *node->def_stack_tail = def; - return; - } - } - - *(++node->def_stack_tail) = def; -} - -/* Pop the top of the def stack if it's in the given block */ -static void -def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) -{ - /* If we're popping, then we have presumably pushed at some time in the - * past so this should exist. - */ - assert(node->def_stack != NULL); - - /* The stack is already empty. Do nothing. */ - if (node->def_stack_tail < node->def_stack) - return; - - nir_ssa_def *def = *node->def_stack_tail; - if (def->parent_instr->block == block) - node->def_stack_tail--; -} - -/** Retrieves the SSA definition on the top of the stack for the given - * node, if one exists. If the stack is empty, then we return the constant - * initializer (if it exists) or an SSA undef. - */ -static nir_ssa_def * -get_ssa_def_for_block(struct deref_node *node, nir_block *block, - struct lower_variables_state *state) -{ - /* If we have something on the stack, go ahead and return it. We're - * assuming that the top of the stack dominates the given block. - */ - if (node->def_stack && node->def_stack_tail >= node->def_stack) - return *node->def_stack_tail; - - /* If we got here then we don't have a definition that dominates the - * given block. This means that we need to add an undef and use that. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - glsl_get_vector_elements(node->type)); - nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); - def_stack_push(node, &undef->def, state); - return &undef->def; -} - -/* Given a block and one of its predecessors, this function fills in the - * souces of the phi nodes to take SSA defs from the given predecessor. - * This function must be called exactly once per block/predecessor pair. - */ -static void -add_phi_sources(nir_block *block, nir_block *pred, - struct lower_variables_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - if (!entry) - continue; - - struct deref_node *node = entry->data; - - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = get_ssa_def_for_block(node, pred, state); - - list_addtail(&src->src.use_link, &src->src.ssa->uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - /* Performs variable renaming by doing a DFS of the dominance tree * * This algorithm is very similar to the one outlined in "Efficiently @@ -595,265 +486,126 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_builder_init(&b, state->impl); nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_push(node, &phi->dest.ssa, state); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); - continue; - } - - if (!node->lower_to_ssa) - continue; - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src.is_ssa = true; - mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; + if (instr->type != nir_instr_type_intrinsic) + continue; - assert(intrin->dest.is_ssa); + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + intrin->num_components); - nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_insert_before(&intrin->instr, &undef->instr); nir_instr_remove(&intrin->instr); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; + nir_src_for_ssa(&undef->def)); + continue; } - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); + if (!node->lower_to_ssa) + continue; - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ - nir_instr_remove(&intrin->instr); - continue; - } + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(node->pb_value, block)); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); - - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); - - if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; - - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (intrin->const_index[0] & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); - } - } - new_def = nir_vec(&b, srcs, intrin->num_components); - } - - assert(new_def->num_components == intrin->num_components); + assert(intrin->dest.is_ssa); - def_stack_push(node, new_def, state); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); - /* We'll wait to remove the instruction until the next pass - * where we pop the node we just pushed back off the stack. - */ - break; - } + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); - default: - break; - } + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + break; } - } - - if (block->successors[0]) - add_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - add_phi_sources(block->successors[1], block, state); - - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - - /* Now we iterate over the instructions and pop off any SSA defs that we - * pushed in the first loop. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - def_stack_pop_if_in_block(node, block); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); - if (intrin->intrinsic != nir_intrinsic_store_var) - continue; - - struct deref_node *node = get_deref_node(intrin->variables[0], state); - if (!node) + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); continue; + } if (!node->lower_to_ssa) continue; - def_stack_pop_if_in_block(node, block); - nir_instr_remove(&intrin->instr); - } - } - - return true; -} - -/* Inserts phi nodes for all variables marked lower_to_ssa - * - * This is the same algorithm as presented in "Efficiently Computing Static - * Single Assignment Form and the Control Dependence Graph" by Cytron et. - * al. - */ -static void -insert_phi_nodes(struct lower_variables_state *state) -{ - NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks); - NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks); - - /* - * Since the work flags already prevent us from inserting a node that has - * ever been inserted into W, we don't need to use a set to represent W. - * Also, since no block can ever be inserted into W more than once, we know - * that the maximum size of W is the number of basic blocks in the - * function. So all we need to handle W is an array and a pointer to the - * next element to be inserted and the next element to be removed. - */ - NIR_VLA(nir_block *, W, state->impl->num_blocks); - - unsigned w_start, w_end; - unsigned iter_count = 0; - - foreach_list_typed(struct deref_node, node, direct_derefs_link, - &state->direct_deref_nodes) { - if (node->stores == NULL) - continue; + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); - if (!node->lower_to_ssa) - continue; + assert(intrin->src[0].is_ssa); - w_start = w_end = 0; - iter_count++; + nir_ssa_def *new_def; + b.cursor = nir_before_instr(&intrin->instr); - struct set_entry *store_entry; - set_foreach(node->stores, store_entry) { - nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; - if (work[store->instr.block->index] < iter_count) - W[w_end++] = store->instr.block; - work[store->instr.block->index] = iter_count; - } - - while (w_start != w_end) { - nir_block *cur = W[w_start++]; - struct set_entry *dom_entry; - set_foreach(cur->dom_frontier, dom_entry) { - nir_block *next = (nir_block *) dom_entry->key; - - /* - * If there's more than one return statement, then the end block - * can be a join point for some definitions. However, there are - * no instructions in the end block, so nothing would use those - * phi nodes. Of course, we couldn't place those phi nodes - * anyways due to the restriction of having no instructions in the - * end block... + if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { + /* Whole variable store - just copy the source. Note that + * intrin->num_components and intrin->src[0].ssa->num_components + * may differ. */ - if (next == state->impl->end_block) - continue; - - if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_block(next, &phi->instr); + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < intrin->num_components ? i : 0; - _mesa_hash_table_insert(state->phi_table, phi, node); - - has_already[next->index] = iter_count; - if (work[next->index] < iter_count) { - work[next->index] = iter_count; - W[w_end++] = next; + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, + intrin->num_components, false); + } else { + nir_ssa_def *old_def = + nir_phi_builder_value_get_block_def(node->pb_value, block); + /* For writemasked store_var intrinsics, we combine the newly + * written values with the existing contents of unwritten + * channels, creating a new SSA value for the whole vector. + */ + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (intrin->const_index[0] & (1 << i)) { + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); + } else { + srcs[i] = nir_channel(&b, old_def, i); } } + new_def = nir_vec(&b, srcs, intrin->num_components); } + + assert(new_def->num_components == intrin->num_components); + + nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); + nir_instr_remove(&intrin->instr); + break; + } + + default: + break; } } -} + for (unsigned i = 0; i < block->num_dom_children; ++i) + rename_variables_block(block->dom_children[i], state); + + return true; +} /** Implements a pass to lower variable uses to SSA values * @@ -895,9 +647,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) _mesa_hash_pointer, _mesa_key_pointer_equal); exec_list_make_empty(&state.direct_deref_nodes); - state.phi_table = _mesa_hash_table_create(state.dead_ctx, - _mesa_hash_pointer, - _mesa_key_pointer_equal); /* Build the initial deref structures and direct_deref_nodes table */ state.add_to_direct_deref_nodes = true; @@ -927,15 +676,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) node->lower_to_ssa = true; progress = true; - if (deref->var->constant_initializer) { - nir_load_const_instr *load = - nir_deref_get_const_initializer_load(state.shader, deref); - nir_ssa_def_init(&load->instr, &load->def, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_cf_list(&impl->body, &load->instr); - def_stack_push(node, &load->def, &state); - } - foreach_deref_node_match(deref, lower_copies_to_load_store, &state); } @@ -952,9 +692,47 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) */ nir_foreach_block(impl, register_variable_uses_block, &state); - insert_phi_nodes(&state); + state.phi_builder = nir_phi_builder_create(state.impl); + + NIR_VLA(BITSET_WORD, store_blocks, BITSET_WORDS(state.impl->num_blocks)); + foreach_list_typed(struct deref_node, node, direct_derefs_link, + &state.direct_deref_nodes) { + if (!node->lower_to_ssa) + continue; + + memset(store_blocks, 0, + BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks)); + + if (node->stores) { + struct set_entry *store_entry; + set_foreach(node->stores, store_entry) { + nir_intrinsic_instr *store = + (nir_intrinsic_instr *)store_entry->key; + BITSET_SET(store_blocks, store->instr.block->index); + } + } + + if (node->deref->var->constant_initializer) + BITSET_SET(store_blocks, 0); + + node->pb_value = + nir_phi_builder_add_value(state.phi_builder, + glsl_get_vector_elements(node->type), + store_blocks); + + if (node->deref->var->constant_initializer) { + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state.shader, node->deref); + nir_instr_insert_before_cf_list(&impl->body, &load->instr); + nir_phi_builder_value_set_block_def(node->pb_value, + nir_start_block(impl), &load->def); + } + } + rename_variables_block(nir_start_block(impl), &state); + nir_phi_builder_finish(state.phi_builder); + nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index e79810c1991..0eff89783dd 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -105,7 +105,7 @@ def opcode(name, output_size, output_type, input_sizes, input_types, opcodes[name] = Opcode(name, output_size, output_type, input_sizes, input_types, algebraic_properties, const_expr) -def unop_convert(name, in_type, out_type, const_expr): +def unop_convert(name, out_type, in_type, const_expr): opcode(name, 0, out_type, [0], [in_type], "", const_expr) def unop(name, ty, const_expr): @@ -155,17 +155,17 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion -unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. +unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion. +unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion +unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion. # Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") +unop_convert("f2b", tbool, tfloat, "src0 != 0.0f") # Boolean-to-float conversion -unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") +unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 != 0") -unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. +unop_convert("i2b", tbool, tint, "src0 != 0") +unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion +unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion. # Unary floating-point rounding operations. @@ -176,6 +176,7 @@ unop("ffloor", tfloat, "floorf(src0)") unop("ffract", tfloat, "src0 - floorf(src0)") unop("fround_even", tfloat, "_mesa_roundevenf(src0)") +unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))") # Trigonometric operations. @@ -237,6 +238,16 @@ unpack_2x16("unorm") unpack_4x8("unorm") unpack_2x16("half") +unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ +dst = (src0.x & 0xffff) | (src0.y >> 16); +""") + +unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ +dst = (src0.x << 0) | + (src0.y << 8) | + (src0.z << 16) | + (src0.w << 24); +""") # Lowered floating point unpacking operations. @@ -264,7 +275,7 @@ for (unsigned bit = 0; bit < 32; bit++) { } """) -unop_convert("ufind_msb", tuint, tint, """ +unop_convert("ufind_msb", tint, tuint, """ dst = -1; for (int bit = 31; bit > 0; bit--) { if ((src0 >> bit) & 1) { @@ -368,9 +379,23 @@ binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0") binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") -binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") +# For signed integers, there are several different possible definitions of +# "modulus" or "remainder". We follow the conventions used by LLVM and +# SPIR-V. The irem opcode implements the standard C/C++ signed "%" +# operation while the imod opcode implements the more mathematical +# "modulus" operation. For details on the difference, see +# +# http://mathforum.org/library/drmath/view/52343.html + +binop("irem", tint, "", "src1 == 0 ? 0 : src0 % src1") +binop("imod", tint, "", + "src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ?" + " src0 % src1 : src0 % src1 + src1)") +binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") +binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)") + # # Comparisons # @@ -536,6 +561,15 @@ dst.x = src0.x; dst.y = src1.x; """) +# Byte extraction +binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") +binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))") + +# Word extraction +binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") +binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))") + + def triop(name, ty, const_expr): opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 7745b76f7ce..f4bfd3a921a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- encoding: utf-8 -*- # # Copyright (C) 2014 Intel Corporation # @@ -74,6 +75,7 @@ optimizations = [ (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), + (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), (('ffma', 0.0, a, b), b), (('ffma', a, 0.0, b), b), (('ffma', a, b, 0.0), ('fmul', a, b)), @@ -225,8 +227,11 @@ optimizations = [ # Misc. lowering (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), + (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), + (('ldexp', 'x', 'exp'), + ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))), (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ilt', 31, 'bits'), 'insert', @@ -242,6 +247,70 @@ optimizations = [ ('bcsel', ('ult', 31, 'bits'), 'value', ('ubfe', 'value', 'offset', 'bits')), 'options->lower_bitfield_extract'), + + (('extract_ibyte', a, b), + ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8), + 'options->lower_extract_byte'), + + (('extract_ubyte', a, b), + ('iand', ('ushr', a, ('imul', b, 8)), 0xff), + 'options->lower_extract_byte'), + + (('extract_iword', a, b), + ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16), + 'options->lower_extract_word'), + + (('extract_uword', a, b), + ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), + 'options->lower_extract_word'), + + (('pack_unorm_2x16', 'v'), + ('pack_uvec2_to_uint', + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), + 'options->lower_pack_unorm_2x16'), + + (('pack_unorm_4x8', 'v'), + ('pack_uvec4_to_uint', + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), + 'options->lower_pack_unorm_4x8'), + + (('pack_snorm_2x16', 'v'), + ('pack_uvec2_to_uint', + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), + 'options->lower_pack_snorm_2x16'), + + (('pack_snorm_4x8', 'v'), + ('pack_uvec4_to_uint', + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), + 'options->lower_pack_snorm_4x8'), + + (('unpack_unorm_2x16', 'v'), + ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0), + ('extract_uword', 'v', 1), 0, 0)), + 65535.0), + 'options->lower_unpack_unorm_2x16'), + + (('unpack_unorm_4x8', 'v'), + ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0), + ('extract_ubyte', 'v', 1), + ('extract_ubyte', 'v', 2), + ('extract_ubyte', 'v', 3))), + 255.0), + 'options->lower_unpack_unorm_4x8'), + + (('unpack_snorm_2x16', 'v'), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0), + ('extract_iword', 'v', 1), 0, 0)), + 32767.0))), + 'options->lower_unpack_snorm_2x16'), + + (('unpack_snorm_4x8', 'v'), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0), + ('extract_ibyte', 'v', 1), + ('extract_ibyte', 'v', 2), + ('extract_ibyte', 'v', 3))), + 127.0))), + 'options->lower_unpack_snorm_4x8'), ] # Add optimizations to handle the case where the result of a ternary is diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c new file mode 100644 index 00000000000..5429083e5c8 --- /dev/null +++ b/src/compiler/nir/nir_phi_builder.c @@ -0,0 +1,254 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_phi_builder.h" +#include "nir/nir_vla.h" + +struct nir_phi_builder { + nir_shader *shader; + nir_function_impl *impl; + + /* Copied from the impl for easy access */ + unsigned num_blocks; + + /* Array of all blocks indexed by block->index. */ + nir_block **blocks; + + /* Hold on to the values so we can easily iterate over them. */ + struct exec_list values; + + /* Worklist for phi adding */ + unsigned iter_count; + unsigned *work; + nir_block **W; +}; + +#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1) + +struct nir_phi_builder_value { + struct exec_node node; + + struct nir_phi_builder *builder; + + /* Needed so we can create phis and undefs */ + unsigned num_components; + + /* The list of phi nodes associated with this value. Phi nodes are not + * added directly. Instead, they are created, the instr->block pointer + * set, and then added to this list. Later, in phi_builder_finish, we + * set up their sources and add them to the top of their respective + * blocks. + */ + struct exec_list phis; + + /* Array of SSA defs, indexed by block. If a phi needs to be inserted + * in a given block, it will have the magic value NEEDS_PHI. + */ + nir_ssa_def *defs[0]; +}; + +static bool +fill_block_array(nir_block *block, void *void_data) +{ + nir_block **blocks = void_data; + blocks[block->index] = block; + return true; +} + +struct nir_phi_builder * +nir_phi_builder_create(nir_function_impl *impl) +{ + struct nir_phi_builder *pb = ralloc(NULL, struct nir_phi_builder); + + pb->shader = impl->function->shader; + pb->impl = impl; + + assert(impl->valid_metadata & (nir_metadata_block_index | + nir_metadata_dominance)); + + pb->num_blocks = impl->num_blocks; + pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks); + nir_foreach_block(impl, fill_block_array, pb->blocks); + + exec_list_make_empty(&pb->values); + + pb->iter_count = 0; + pb->work = rzalloc_array(pb, unsigned, pb->num_blocks); + pb->W = ralloc_array(pb, nir_block *, pb->num_blocks); + + return pb; +} + +struct nir_phi_builder_value * +nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, + const BITSET_WORD *defs) +{ + struct nir_phi_builder_value *val; + unsigned i, w_start = 0, w_end = 0; + + val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks); + val->builder = pb; + val->num_components = num_components; + exec_list_make_empty(&val->phis); + exec_list_push_tail(&pb->values, &val->node); + + pb->iter_count++; + + BITSET_WORD tmp; + BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) { + if (pb->work[i] < pb->iter_count) + pb->W[w_end++] = pb->blocks[i]; + pb->work[i] = pb->iter_count; + } + + while (w_start != w_end) { + nir_block *cur = pb->W[w_start++]; + struct set_entry *dom_entry; + set_foreach(cur->dom_frontier, dom_entry) { + nir_block *next = (nir_block *) dom_entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == pb->impl->end_block) + continue; + + if (val->defs[next->index] == NULL) { + val->defs[next->index] = NEEDS_PHI; + + if (pb->work[next->index] < pb->iter_count) { + pb->work[next->index] = pb->iter_count; + pb->W[w_end++] = next; + } + } + } + } + + return val; +} + +void +nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, + nir_block *block, nir_ssa_def *def) +{ + val->defs[block->index] = def; +} + +nir_ssa_def * +nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, + nir_block *block) +{ + if (val->defs[block->index] == NULL) { + if (block->imm_dom) { + /* Grab it from our immediate dominator. We'll stash it here for + * easy access later. + */ + val->defs[block->index] = + nir_phi_builder_value_get_block_def(val, block->imm_dom); + return val->defs[block->index]; + } else { + /* No immediate dominator means that this block is either the + * start block or unreachable. In either case, the value is + * undefined so we need an SSA undef. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(val->builder->shader, + val->num_components); + nir_instr_insert(nir_before_cf_list(&val->builder->impl->body), + &undef->instr); + val->defs[block->index] = &undef->def; + return &undef->def; + } + } else if (val->defs[block->index] == NEEDS_PHI) { + /* If we need a phi instruction, go ahead and create one but don't + * add it to the program yet. Later, we'll go through and set up phi + * sources and add the instructions will be added at that time. + */ + nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL); + phi->instr.block = block; + exec_list_push_tail(&val->phis, &phi->instr.node); + val->defs[block->index] = &phi->dest.ssa; + return &phi->dest.ssa; + } else { + return val->defs[block->index]; + } +} + +static int +compare_blocks(const void *_a, const void *_b) +{ + nir_block * const * a = _a; + nir_block * const * b = _b; + + return (*a)->index - (*b)->index; +} + +void +nir_phi_builder_finish(struct nir_phi_builder *pb) +{ + const unsigned num_blocks = pb->num_blocks; + NIR_VLA(nir_block *, preds, num_blocks); + + foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) { + /* We can't iterate over the list of phis normally because we are + * removing them as we go and, in some cases, adding new phis as we + * build the source lists of others. + */ + while (!exec_list_is_empty(&val->phis)) { + struct exec_node *head = exec_list_get_head(&val->phis); + nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node); + assert(phi->instr.type == nir_instr_type_phi); + + exec_node_remove(&phi->instr.node); + + /* Construct an array of predecessors. We sort it to ensure + * determinism in the phi insertion algorithm. + * + * XXX: Calling qsort this many times seems expensive. + */ + int num_preds = 0; + struct set_entry *entry; + set_foreach(phi->instr.block->predecessors, entry) + preds[num_preds++] = (nir_block *)entry->key; + qsort(preds, num_preds, sizeof(*preds), compare_blocks); + + for (unsigned i = 0; i < num_preds; i++) { + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = preds[i]; + src->src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(val, preds[i])); + exec_list_push_tail(&phi->srcs, &src->node); + } + + nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr); + } + } + + ralloc_free(pb); +} diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h new file mode 100644 index 00000000000..50251bf1ba3 --- /dev/null +++ b/src/compiler/nir/nir_phi_builder.h @@ -0,0 +1,84 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir.h" + +struct nir_phi_builder; +struct nir_phi_builder_value; + +/* Create a new phi builder. + * + * While this is fairly cheap, it does allocate some memory and walk the list + * of blocks so it's recommended that you only call it once and use it to + * build phis for several values. + */ +struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl); + +/* Register a value with the builder. + * + * The 'defs' parameter specifies a bitset of blocks in which the given value + * is defined. This is used to determine where to place the phi nodes. + */ +struct nir_phi_builder_value * +nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, + const BITSET_WORD *defs); + +/* Register a definition for the given value and block. + * + * It is safe to call this function as many times as you wish for any given + * block/value pair. However, it always replaces whatever was there + * previously even if that definition is from a phi node. The phi builder + * always uses the latest information it has, so you must be careful about the + * order in which you register definitions. The final value at the end of the + * block must be the last value registered. + */ +void +nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, + nir_block *block, nir_ssa_def *def); + +/* Get the definition for the given value in the given block. + * + * This definition will always be the latest definition known for the given + * block. If no definition is immediately available, it will crawl up the + * dominance tree and insert phi nodes as needed until it finds one. In the + * case that no suitable definition is found, it will return the result of a + * nir_ssa_undef_instr with the correct number of components. + * + * Because this function only uses the latest available information for any + * given block, you must have already finished registering definitions for any + * blocks that dominate the current block in order to get the correct result. + */ +nir_ssa_def * +nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, + nir_block *block); + +/* Finish building phi nodes and free the builder. + * + * This function does far more than just free memory. Prior to calling + * nir_phi_builder_finish, no phi nodes have actually been inserted in the + * program. This function is what finishes setting up phi node sources and + * adds the phi nodes to the program. + */ +void nir_phi_builder_finish(struct nir_phi_builder *pb); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 48ecb48a620..f36b91de6e0 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -219,6 +219,40 @@ print_alu_instr(nir_alu_instr *instr, print_state *state) } } +static const char * +get_var_name(nir_variable *var, print_state *state) +{ + if (state->ht == NULL) + return var->name; + + assert(state->syms); + + struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); + if (entry) + return entry->data; + + char *name; + if (var->name == NULL) { + name = ralloc_asprintf(state->syms, "@%u", state->index++); + } else { + struct set_entry *set_entry = _mesa_set_search(state->syms, var->name); + if (set_entry != NULL) { + /* we have a collision with another name, append an @ + a unique + * index */ + name = ralloc_asprintf(state->syms, "%s@%u", var->name, + state->index++); + } else { + /* Mark this one as seen */ + _mesa_set_add(state->syms, var->name); + name = var->name; + } + } + + _mesa_hash_table_insert(state->ht, var, name); + + return name; +} + static void print_constant(nir_constant *c, const struct glsl_type *type, print_state *state) { @@ -278,7 +312,8 @@ print_var_decl(nir_variable *var, print_state *state) const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage", "system " }; + "uniform ", "shader_storage ", "shared ", + "system "}; fprintf(fp, "%s%s%s%s%s%s ", cent, samp, patch, inv, mode[var->data.mode], @@ -286,20 +321,7 @@ print_var_decl(nir_variable *var, print_state *state) glsl_print_type(var->type, fp); - struct set_entry *entry = NULL; - if (state->syms) - entry = _mesa_set_search(state->syms, var->name); - - char *name; - - if (entry != NULL) { - /* we have a collision with another name, append an @ + a unique index */ - name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); - } else { - name = var->name; - } - - fprintf(fp, " %s", name); + fprintf(fp, " %s", get_var_name(var, state)); if (var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out || @@ -349,28 +371,13 @@ print_var_decl(nir_variable *var, print_state *state) } fprintf(fp, "\n"); - - if (state->syms) { - _mesa_set_add(state->syms, name); - _mesa_hash_table_insert(state->ht, var, name); - } } static void print_var(nir_variable *var, print_state *state) { FILE *fp = state->fp; - const char *name; - if (state->ht) { - struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); - - assert(entry != NULL); - name = entry->data; - } else { - name = var->name; - } - - fprintf(fp, "%s", name); + fprintf(fp, "%s", get_var_name(var, state)); } static void @@ -600,6 +607,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_ddy: fprintf(fp, "(ddy)"); break; + case nir_tex_src_texture_offset: + fprintf(fp, "(texture_offset)"); + break; case nir_tex_src_sampler_offset: fprintf(fp, "(sampler_offset)"); break; @@ -630,13 +640,18 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) fprintf(fp, "%u (gather_component), ", instr->component); } + if (instr->texture) { + assert(instr->sampler); + fprintf(fp, " (texture)"); + } if (instr->sampler) { print_deref(instr->sampler, state); + fprintf(fp, " (sampler)"); } else { - fprintf(fp, "%u", instr->sampler_index); + assert(instr->texture == NULL); + fprintf(fp, "%u (texture) %u (sampler)", + instr->texture_index, instr->sampler_index); } - - fprintf(fp, " (sampler)"); } static void @@ -1026,6 +1041,7 @@ nir_print_shader(nir_shader *shader, FILE *fp) fprintf(fp, "inputs: %u\n", shader->num_inputs); fprintf(fp, "outputs: %u\n", shader->num_outputs); fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + fprintf(fp, "shared: %u\n", shader->num_shared); nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); @@ -1039,6 +1055,10 @@ nir_print_shader(nir_shader *shader, FILE *fp) print_var_decl(var, &state); } + nir_foreach_variable(var, &shader->shared) { + print_var_decl(var, &state); + } + nir_foreach_variable(var, &shader->globals) { print_var_decl(var, &state); } diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c index db754e56b1c..792c5d4aae6 100644 --- a/src/compiler/nir/nir_remove_dead_variables.c +++ b/src/compiler/nir/nir_remove_dead_variables.c @@ -115,7 +115,7 @@ remove_dead_vars(struct exec_list *var_list, struct set *live) } bool -nir_remove_dead_variables(nir_shader *shader) +nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode) { bool progress = false; struct set *live = @@ -123,15 +123,30 @@ nir_remove_dead_variables(nir_shader *shader) add_var_use_shader(shader, live); - progress = remove_dead_vars(&shader->globals, live) || progress; + if (mode == nir_var_uniform || mode == nir_var_all) + progress = remove_dead_vars(&shader->uniforms, live) || progress; - nir_foreach_function(shader, function) { - if (function->impl) { - if (remove_dead_vars(&function->impl->locals, live)) { - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_ssa_defs); - progress = true; + if (mode == nir_var_shader_in || mode == nir_var_all) + progress = remove_dead_vars(&shader->inputs, live) || progress; + + if (mode == nir_var_shader_out || mode == nir_var_all) + progress = remove_dead_vars(&shader->outputs, live) || progress; + + if (mode == nir_var_global || mode == nir_var_all) + progress = remove_dead_vars(&shader->globals, live) || progress; + + if (mode == nir_var_system_value || mode == nir_var_all) + progress = remove_dead_vars(&shader->system_values, live) || progress; + + if (mode == nir_var_local || mode == nir_var_all) { + nir_foreach_function(shader, function) { + if (function->impl) { + if (remove_dead_vars(&function->impl->locals, live)) { + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + progress = true; + } } } } diff --git a/src/compiler/nir/nir_repair_ssa.c b/src/compiler/nir/nir_repair_ssa.c new file mode 100644 index 00000000000..3ab4f0f6db7 --- /dev/null +++ b/src/compiler/nir/nir_repair_ssa.c @@ -0,0 +1,157 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_phi_builder.h" + +struct repair_ssa_state { + nir_function_impl *impl; + + BITSET_WORD *def_set; + struct nir_phi_builder *phi_builder; + + bool progress; +}; + +/* Get ready to build a phi and return the builder */ +static struct nir_phi_builder * +prep_build_phi(struct repair_ssa_state *state) +{ + const unsigned num_words = BITSET_WORDS(state->impl->num_blocks); + + /* We create the phi builder on-demand. */ + if (state->phi_builder == NULL) { + state->phi_builder = nir_phi_builder_create(state->impl); + state->def_set = ralloc_array(NULL, BITSET_WORD, num_words); + } + + /* We're going to build a phi. That's progress. */ + state->progress = true; + + /* Set the defs set to empty */ + memset(state->def_set, 0, num_words * sizeof(*state->def_set)); + + return state->phi_builder; +} + +static nir_block * +get_src_block(nir_src *src) +{ + if (src->parent_instr->type == nir_instr_type_phi) { + return exec_node_data(nir_phi_src, src, src)->pred; + } else { + return src->parent_instr->block; + } +} + +static bool +repair_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct repair_ssa_state *state = void_state; + + bool is_valid = true; + nir_foreach_use(def, src) { + if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) { + is_valid = false; + break; + } + } + + if (is_valid) + return true; + + struct nir_phi_builder *pb = prep_build_phi(state); + + BITSET_SET(state->def_set, def->parent_instr->block->index); + + struct nir_phi_builder_value *val = + nir_phi_builder_add_value(pb, def->num_components, state->def_set); + + nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def); + + nir_foreach_use_safe(def, src) { + nir_block *src_block = get_src_block(src); + if (!nir_block_dominates(def->parent_instr->block, src_block)) { + nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa( + nir_phi_builder_value_get_block_def(val, src_block))); + } + } + + return true; +} + +static bool +repair_ssa_block(nir_block *block, void *state) +{ + nir_foreach_instr_safe(block, instr) { + nir_foreach_ssa_def(instr, repair_ssa_def, state); + } + + return true; +} + +bool +nir_repair_ssa_impl(nir_function_impl *impl) +{ + struct repair_ssa_state state; + + state.impl = impl; + state.phi_builder = NULL; + state.progress = false; + + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_foreach_block(impl, repair_ssa_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + if (state.phi_builder) { + nir_phi_builder_finish(state.phi_builder); + ralloc_free(state.def_set); + } + + return state.progress; +} + +/** This pass can be used to repair SSA form in a shader. + * + * Sometimes a transformation (such as return lowering) will have to make + * changes to a shader which, while still correct, break some of NIR's SSA + * invariants. This pass will insert ssa_undefs and phi nodes as needed to + * get the shader back into SSA that the validator will like. + */ +bool +nir_repair_ssa(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_repair_ssa_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c index 0710bdba7c7..5c62154ec7f 100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@ -159,6 +159,7 @@ nir_sweep(nir_shader *nir) steal_list(nir, nir_variable, &nir->uniforms); steal_list(nir, nir_variable, &nir->inputs); steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->shared); steal_list(nir, nir_variable, &nir->globals); steal_list(nir, nir_variable, &nir->system_values); steal_list(nir, nir_register, &nir->registers); diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index e4db68db3c0..1a943d76314 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -454,10 +454,12 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) static void validate_call_instr(nir_call_instr *instr, validate_state *state) { - if (instr->return_deref == NULL) + if (instr->return_deref == NULL) { assert(glsl_type_is_void(instr->callee->return_type)); - else + } else { assert(instr->return_deref->deref.type == instr->callee->return_type); + validate_deref_var(instr, instr->return_deref, state); + } assert(instr->num_params == instr->callee->num_params); @@ -465,8 +467,6 @@ validate_call_instr(nir_call_instr *instr, validate_state *state) assert(instr->callee->params[i].type == instr->params[i]->deref.type); validate_deref_var(instr, instr->params[i], state); } - - validate_deref_var(instr, instr->return_deref, state); } static void @@ -1036,6 +1036,11 @@ nir_validate_shader(nir_shader *shader) validate_var_decl(var, true, &state); } + exec_list_validate(&shader->shared); + nir_foreach_variable(var, &shader->shared) { + validate_var_decl(var, true, &state); + } + exec_list_validate(&shader->globals); nir_foreach_variable(var, &shader->globals) { validate_var_decl(var, true, &state); diff --git a/src/compiler/nir/spirv/GLSL.std.450.h b/src/compiler/nir/spirv/GLSL.std.450.h new file mode 100644 index 00000000000..d1c9b5c1d44 --- /dev/null +++ b/src/compiler/nir/spirv/GLSL.std.450.h @@ -0,0 +1,127 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/src/compiler/nir/spirv/nir_spirv.h b/src/compiler/nir/spirv/nir_spirv.h new file mode 100644 index 00000000000..500f2cb94df --- /dev/null +++ b/src/compiler/nir/spirv/nir_spirv.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir/nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_spirv_specialization { + uint32_t id; + uint32_t data; +}; + +nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *specializations, + unsigned num_specializations, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/compiler/nir/spirv/spirv.h b/src/compiler/nir/spirv/spirv.h new file mode 100644 index 00000000000..63bcb2f88dd --- /dev/null +++ b/src/compiler/nir/spirv/spirv.h @@ -0,0 +1,870 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 2 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 2; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c new file mode 100644 index 00000000000..c002457ce12 --- /dev/null +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@ -0,0 +1,2654 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" +#include "nir/nir_control_flow.h" +#include "nir/nir_constant_expressions.h" + +static struct vtn_ssa_value * +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(b->shader, num_components); + + nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); + val->def = &undef->def; + } else { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_matrix(type)) { + const struct glsl_type *elem_type = + glsl_vector_type(glsl_get_base_type(type), + glsl_get_vector_elements(type)); + + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else if (glsl_type_is_array(type)) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else { + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = glsl_get_struct_field(type, i); + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_undef: + return vtn_undef_ssa_value(b, val->type->type); + + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + + case vtn_value_type_access_chain: + /* This is needed for function parameters */ + return vtn_variable_load(b, val->access_chain); + + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count, unsigned *words_used) +{ + char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); + if (words_used) { + /* Ammount of space taken by the string (including the null) */ + unsigned len = strlen(dup) + 1; + *words_used = DIV_ROUND_UP(len, sizeof(*words)); + } + return dup; +} + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + b->file = NULL; + b->line = -1; + b->col = -1; + + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + switch (opcode) { + case SpvOpNop: + break; /* Do nothing */ + + case SpvOpLine: + b->file = vtn_value(b, w[1], vtn_value_type_string)->str; + b->line = w[2]; + b->col = w[3]; + break; + + case SpvOpNoLine: + b->file = NULL; + b->line = -1; + b->col = -1; + break; + + default: + if (!handler(b, opcode, w, count)) + return w; + break; + } + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->scope == VTN_DEC_DECORATION) { + member = parent_member; + } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { + assert(parent_member == -1); + member = dec->scope - VTN_DEC_STRUCT_MEMBER0; + } else { + /* Not a decoration */ + continue; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +void +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->scope != VTN_DEC_EXECUTION_MODE) + continue; + + assert(dec->group == NULL); + cb(b, value, dec, data); + } +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_decoration_group); + break; + + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpExecutionMode: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + switch (opcode) { + case SpvOpDecorate: + dec->scope = VTN_DEC_DECORATION; + break; + case SpvOpMemberDecorate: + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + break; + case SpvOpExecutionMode: + dec->scope = VTN_DEC_EXECUTION_MODE; + break; + default: + unreachable("Invalid decoration opcode"); + } + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + case SpvOpGroupDecorate: { + struct vtn_value *group = + vtn_value(b, target, vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = vtn_untyped_value(b, *w); + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + + dec->group = group; + if (opcode == SpvOpGroupDecorate) { + dec->scope = VTN_DEC_DECORATION; + } else { + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + } + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_ARRAY: + dest->row_major = src->row_major; + dest->stride = src->stride; + dest->array_element = src->array_element; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static struct vtn_type * +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) +{ + type->members[member] = vtn_type_copy(b, type->members[member]); + type = type->members[member]; + + /* We may have an array of matrices.... Oh, joy! */ + while (glsl_type_is_array(type->type)) { + type->array_element = vtn_type_copy(b, type->array_element); + type = type->array_element; + } + + assert(glsl_type_is_matrix(type->type)); + + return type; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + case SpvDecorationRowMajor: + mutable_matrix_member(b, ctx->type, member)->row_major = true; + break; + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + val->type->val = val; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: + val->type->type = glsl_int_type(); + break; + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base->type)); + val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); + + /* Vectors implicitly have sizeof(base_type) stride. For now, this + * is always 4 bytes. This will have to change if we want to start + * supporting doubles or half-floats. + */ + val->type->stride = 4; + val->type->array_element = base; + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + assert(!glsl_type_is_error(val->type->type)); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + unsigned length; + if (opcode == SpvOpTypeRuntimeArray) { + /* A length of 0 is used to denote unsized arrays */ + length = 0; + } else { + length = + vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; + } + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i] = (struct glsl_struct_field) { + .type = val->type->members[i]->type, + .name = ralloc_asprintf(b, "field%d", i), + .location = -1, + }; + } + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + if (count > 9) + val->type->access_qualifier = w[9]; + else + val->type->access_qualifier = SpvAccessQualifierReadWrite; + + assert(!multisampled && "FIXME: Handl multi-sampled textures"); + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + * + * TODO: Eventually we should consider adding a "bare sampler" type + * to glsl_types. + */ + val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, + GLSL_TYPE_FLOAT); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static nir_constant * +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) +{ + nir_constant *c = rzalloc(b, nir_constant); + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* Nothing to do here. It's already initialized to zero */ + break; + + case GLSL_TYPE_ARRAY: + assert(glsl_get_length(type) > 0); + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); + for (unsigned i = 1; i < c->num_elements; i++) + c->elements[i] = c->elements[0]; + break; + + case GLSL_TYPE_STRUCT: + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + for (unsigned i = 0; i < c->num_elements; i++) { + c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); + } + break; + + default: + unreachable("Invalid type for null constant"); + } + + return c; +} + +static void +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, + int member, const struct vtn_decoration *dec, + void *data) +{ + assert(member == -1); + if (dec->decoration != SpvDecorationSpecId) + return; + + uint32_t *const_value = data; + + for (unsigned i = 0; i < b->num_specializations; i++) { + if (b->specializations[i].id == dec->literals[0]) { + *const_value = b->specializations[i].data; + return; + } + } +} + +static uint32_t +get_specialization(struct vtn_builder *b, struct vtn_value *val, + uint32_t const_value) +{ + vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); + return const_value; +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = rzalloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: { + assert(val->const_type == glsl_bool_type()); + uint32_t int_val = + get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); + val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; + break; + } + + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpSpecConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = get_specialization(b, val, w[3]); + break; + case SpvOpSpecConstantComposite: + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->num_elements = elem_count; + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + case SpvOpSpecConstantOp: { + SpvOp opcode = get_specialization(b, val, w[3]); + switch (opcode) { + case SpvOpVectorShuffle: { + struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); + struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); + unsigned len0 = glsl_get_vector_elements(v0->const_type); + unsigned len1 = glsl_get_vector_elements(v1->const_type); + + uint32_t u[8]; + for (unsigned i = 0; i < len0; i++) + u[i] = v0->constant->value.u[i]; + for (unsigned i = 0; i < len1; i++) + u[len0 + i] = v1->constant->value.u[i]; + + for (unsigned i = 0; i < count - 6; i++) { + uint32_t comp = w[i + 6]; + if (comp == (uint32_t)-1) { + val->constant->value.u[i] = 0xdeadbeef; + } else { + val->constant->value.u[i] = u[comp]; + } + } + return; + } + + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: { + struct vtn_value *comp; + unsigned deref_start; + struct nir_constant **c; + if (opcode == SpvOpCompositeExtract) { + comp = vtn_value(b, w[4], vtn_value_type_constant); + deref_start = 5; + c = &comp->constant; + } else { + comp = vtn_value(b, w[5], vtn_value_type_constant); + deref_start = 6; + val->constant = nir_constant_clone(comp->constant, + (nir_variable *)b); + c = &val->constant; + } + + int elem = -1; + const struct glsl_type *type = comp->const_type; + for (unsigned i = deref_start; i < count; i++) { + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* If we hit this granularity, we're picking off an element */ + if (elem < 0) + elem = 0; + + if (glsl_type_is_matrix(type)) { + elem += w[i] * glsl_get_vector_elements(type); + type = glsl_get_column_type(type); + } else { + assert(glsl_type_is_vector(type)); + elem += w[i]; + type = glsl_scalar_type(glsl_get_base_type(type)); + } + continue; + + case GLSL_TYPE_ARRAY: + c = &(*c)->elements[w[i]]; + type = glsl_get_array_element(type); + continue; + + case GLSL_TYPE_STRUCT: + c = &(*c)->elements[w[i]]; + type = glsl_get_struct_field(type, w[i]); + continue; + + default: + unreachable("Invalid constant type"); + } + } + + if (opcode == SpvOpCompositeExtract) { + if (elem == -1) { + val->constant = *c; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + val->constant->value.u[i] = (*c)->value.u[elem + i]; + } + } else { + struct vtn_value *insert = + vtn_value(b, w[4], vtn_value_type_constant); + assert(insert->const_type == type); + if (elem == -1) { + *c = insert->constant; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + (*c)->value.u[elem + i] = insert->constant->value.u[i]; + } + } + return; + } + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + unsigned num_components = glsl_get_vector_elements(val->const_type); + + nir_const_value src[3]; + assert(count <= 7); + for (unsigned i = 0; i < count - 4; i++) { + nir_constant *c = + vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; + + unsigned j = swap ? 1 - i : i; + for (unsigned k = 0; k < num_components; k++) + src[j].u[k] = c->value.u[k]; + } + + nir_const_value res = nir_eval_const_opcode(op, num_components, src); + + for (unsigned k = 0; k < num_components; k++) + val->constant->value.u[k] = res.u[k]; + + return; + } /* default */ + } + } + + case SpvOpConstantNull: + val->constant = vtn_null_constant(b, val->const_type); + break; + + case SpvOpConstantSampler: + assert(!"OpConstantSampler requires Kernel Capability"); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct nir_function *callee = + vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; + + nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); + for (unsigned i = 0; i < call->num_params; i++) { + unsigned arg_id = w[4 + i]; + struct vtn_value *arg = vtn_untyped_value(b, arg_id); + if (arg->value_type == vtn_value_type_access_chain) { + nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); + call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); + } else { + struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); + + /* Make a temporary to store the argument in */ + nir_variable *tmp = + nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); + call->params[i] = nir_deref_var_create(call, tmp); + + vtn_local_store(b, arg_ssa, call->params[i]); + } + } + + nir_variable *out_tmp = NULL; + if (!glsl_type_is_void(callee->return_type)) { + out_tmp = nir_local_variable_create(b->impl, callee->return_type, + "out_tmp"); + call->return_deref = nir_deref_var_create(call, out_tmp); + } + + nir_builder_instr_insert(&b->nb, &call->instr); + + if (glsl_type_is_void(callee->return_type)) { + vtn_push_value(b, w[2], vtn_value_type_undef); + } else { + struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); + retval->ssa = vtn_local_load(b, call->return_deref); + } +} + +struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; + return; + } else if (opcode == SpvOpImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + struct vtn_value *src_val = vtn_untyped_value(b, w[3]); + if (src_val->value_type == vtn_value_type_sampled_image) { + val->access_chain = src_val->sampled_image->image; + } else { + assert(src_val->value_type == vtn_value_type_access_chain); + val->access_chain = src_val->access_chain; + } + return; + } + + struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_access_chain); + sampled.image = NULL; + sampled.sampler = sampled_val->access_chain; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + bool has_coord = false; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + has_coord = true; + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* For OpImageQuerySizeLod, we always have an LOD */ + if (opcode == SpvOpImageQuerySizeLod) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + texop = nir_texop_txf; + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + instr->op = texop; + + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + + const struct glsl_type *image_type; + if (sampled.image) { + image_type = sampled.image->var->var->interface_type; + } else { + image_type = sampled.sampler->var->var->interface_type; + } + + instr->sampler_dim = glsl_get_sampler_dim(image_type); + instr->is_array = glsl_sampler_type_is_array(image_type); + instr->is_shadow = glsl_sampler_type_is_shadow(image_type); + instr->is_new_style_shadow = instr->is_shadow; + + if (has_coord) { + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + instr->coord_components = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + instr->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + case GLSL_SAMPLER_DIM_MS: + instr->coord_components = 3; + break; + default: + assert("Invalid sampler type"); + } + + if (instr->is_array) + instr->coord_components++; + } else { + instr->coord_components = 0; + } + + switch (glsl_get_sampler_result_type(image_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + if (sampled.image) { + nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); + } else { + instr->texture = NULL; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, + nir_tex_instr_dest_size(instr), NULL); + + assert(glsl_get_vector_elements(ret_type->type) == + nir_tex_instr_dest_size(instr)); + + val->ssa = vtn_create_ssa_value(b, ret_type->type); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageQuerySize: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = NULL; + image.sample = NULL; + break; + + case SpvOpImageRead: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + case SpvOpImageWrite: + image.image = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageQuerySize, size) + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + + nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); + + /* ImageQuerySize doesn't take any extra parameters */ + if (opcode != SpvOpImageQuerySize) { + /* The image coordinate is always 4 components but we may not have that + * many. Swizzle to compensate. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < image.coord->num_components ? i : 0; + intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, + swiz, 4, false)); + intrin->src[1] = nir_src_for_ssa(image.sample); + } + + switch (opcode) { + case SpvOpImageQuerySize: + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL); + + nir_builder_instr_insert(&b->nb, &intrin->instr); + + /* The image intrinsics always return 4 channels but we may not want + * that many. Emit a mov to trim it down. + */ + unsigned swiz[4] = {0, 1, 2, 3}; + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, + glsl_get_vector_elements(type->type), false); + } else { + nir_builder_instr_insert(&b->nb, &intrin->instr); + } +} + +static nir_intrinsic_op +get_ssbo_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid SSBO atomic"); + } +} + +static nir_intrinsic_op +get_shared_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid shared atomic"); + } +} + +static void +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, nir_src *src) +{ + switch (opcode) { + case SpvOpAtomicIIncrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + + case SpvOpAtomicIDecrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicISub: + src[0] = + nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + case SpvOpAtomicCompareExchange: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + break; + /* Fall through */ + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + default: + unreachable("Invalid SPIR-V atomic"); + } +} + +static void +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + nir_intrinsic_instr *atomic; + + /* + SpvScope scope = w[4]; + SpvMemorySemanticsMask semantics = w[5]; + */ + + if (chain->var->mode == vtn_variable_mode_workgroup) { + nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; + nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); + fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); + } else { + assert(chain->var->mode == vtn_variable_mode_ssbo); + struct vtn_type *type; + nir_ssa_def *offset, *index; + offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); + + nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); + + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->src[0] = nir_src_for_ssa(index); + atomic->src[1] = nir_src_for_ssa(offset); + fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); + } + + nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->def = &atomic->dest.ssa; + val->ssa->type = type->type; + + nir_builder_instr_insert(&b->nb, &atomic->instr); +} + +static nir_alu_instr * +create_vec(nir_shader *shader, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(shader, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +struct vtn_ssa_value * +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b->shader, + glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + +nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } else { + cur = cur->elems[indices[i]]; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + intrinsic_op = nir_intrinsic_barrier; + break; + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + intrin->const_index[0] = w[1]; + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static gl_shader_stage +stage_for_execution_model(SpvExecutionModel model) +{ + switch (model) { + case SpvExecutionModelVertex: + return MESA_SHADER_VERTEX; + case SpvExecutionModelTessellationControl: + return MESA_SHADER_TESS_CTRL; + case SpvExecutionModelTessellationEvaluation: + return MESA_SHADER_TESS_EVAL; + case SpvExecutionModelGeometry: + return MESA_SHADER_GEOMETRY; + case SpvExecutionModelFragment: + return MESA_SHADER_FRAGMENT; + case SpvExecutionModelGLCompute: + return MESA_SHADER_COMPUTE; + default: + unreachable("Unsupported execution model"); + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpSourceContinued: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: + switch ((SpvCapability)w[1]) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + case SpvCapabilityGeometry: + break; + default: + assert(!"Unsupported capability"); + } + break; + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: { + struct vtn_value *entry_point = &b->values[w[2]]; + /* Let this be a name label regardless */ + unsigned name_words; + entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); + + if (strcmp(entry_point->name, b->entry_point_name) != 0 || + stage_for_execution_model(w[1]) != b->entry_point_stage) + break; + + assert(b->entry_point == NULL); + b->entry_point = entry_point; + break; + } + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpExecutionMode: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static void +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, + const struct vtn_decoration *mode, void *data) +{ + assert(b->entry_point == entry_point); + + switch(mode->exec_mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = + (mode->exec_mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = mode->literals[0]; + b->shader->info.cs.local_size[1] = mode->literals[1]; + b->shader->info.cs.local_size[2] = mode->literals[2]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = mode->literals[0]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode->exec_mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode->exec_mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceContinued: + case SpvOpSourceExtension: + case SpvOpExtension: + case SpvOpCapability: + case SpvOpExtInstImport: + case SpvOpMemoryModel: + case SpvOpEntryPoint: + case SpvOpExecutionMode: + case SpvOpString: + case SpvOpName: + case SpvOpMemberName: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + assert(!"Invalid opcode types and variables section"); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpConstantNull: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + case SpvOpSpecConstantOp: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: + break; + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpImageQuerySize: { + struct vtn_access_chain *image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + if (glsl_type_is_image(image->var->var->interface_type)) { + vtn_handle_image(b, opcode, w, count); + } else { + vtn_handle_texture(b, opcode, w, count); + } + break; + } + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(pointer->value_type == vtn_value_type_access_chain); + vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); + } + break; + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpQuantizeToF16: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpIAddCarry: + case SpvOpISubBorrow: + case SpvOpUMulExtended: + case SpvOpSMulExtended: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalOr: + case SpvOpLogicalAnd: + case SpvOpLogicalNot: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + case SpvOpBitFieldInsert: + case SpvOpBitFieldSExtract: + case SpvOpBitFieldUExtract: + case SpvOpBitReverse: + case SpvOpBitCount: + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +nir_function * +spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *spec, unsigned num_spec, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] >= 0x10000); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + b->entry_point_stage = stage; + b->entry_point_name = entry_point_name; + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + if (b->entry_point == NULL) { + assert(!"Entry point not found"); + ralloc_free(b); + return NULL; + } + + b->shader = nir_shader_create(NULL, stage, options); + + /* Parse execution modes */ + vtn_foreach_execution_mode(b, b->entry_point, + vtn_handle_execution_mode, NULL); + + b->specializations = spec; + b->num_specializations = num_spec; + + /* Handle all variable, type, and constant instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_variable_or_type_instruction); + + vtn_build_cfg(b, words, word_end); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = func->impl; + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_function_emit(b, func, vtn_handle_body_instruction); + } + + assert(b->entry_point->value_type == vtn_value_type_function); + nir_function *entry_point = b->entry_point->func->impl->function; + assert(entry_point); + + ralloc_free(b); + + return entry_point; +} diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c new file mode 100644 index 00000000000..d866da7445e --- /dev/null +++ b/src/compiler/nir/spirv/vtn_alu.c @@ -0,0 +1,448 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); + + dest = wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_ssa_def *vec_src[4]; + for (unsigned j = 0; j < src0_rows; j++) { + vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def); + } + dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + nir_channel(&b->nb, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + nir_channel(&b->nb, src1->elems[i]->def, j))); + } + } + } + + dest = unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_ssa_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + struct vtn_value *dest, + struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) +{ + switch (opcode) { + case SpvOpFNegate: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); + break; + } + + case SpvOpFAdd: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpFSub: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpTranspose: + dest->ssa = vtn_ssa_transpose(b, src0); + break; + + case SpvOpMatrixTimesScalar: + if (src0->transposed) { + dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, + src1->def)); + } else { + dest->ssa = mat_times_scalar(b, src0, src1->def); + } + break; + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + if (opcode == SpvOpVectorTimesMatrix) { + dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); + } else { + dest->ssa = matrix_multiply(b, src0, src1); + } + break; + + default: unreachable("unknown matrix opcode"); + } +} + +nir_op +vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) +{ + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + *swap = false; + + switch (opcode) { + case SpvOpSNegate: return nir_op_ineg; + case SpvOpFNegate: return nir_op_fneg; + case SpvOpNot: return nir_op_inot; + case SpvOpIAdd: return nir_op_iadd; + case SpvOpFAdd: return nir_op_fadd; + case SpvOpISub: return nir_op_isub; + case SpvOpFSub: return nir_op_fsub; + case SpvOpIMul: return nir_op_imul; + case SpvOpFMul: return nir_op_fmul; + case SpvOpUDiv: return nir_op_udiv; + case SpvOpSDiv: return nir_op_idiv; + case SpvOpFDiv: return nir_op_fdiv; + case SpvOpUMod: return nir_op_umod; + case SpvOpSMod: return nir_op_imod; + case SpvOpFMod: return nir_op_fmod; + case SpvOpSRem: return nir_op_irem; + case SpvOpFRem: return nir_op_frem; + + case SpvOpShiftRightLogical: return nir_op_ushr; + case SpvOpShiftRightArithmetic: return nir_op_ishr; + case SpvOpShiftLeftLogical: return nir_op_ishl; + case SpvOpLogicalOr: return nir_op_ior; + case SpvOpLogicalEqual: return nir_op_ieq; + case SpvOpLogicalNotEqual: return nir_op_ine; + case SpvOpLogicalAnd: return nir_op_iand; + case SpvOpLogicalNot: return nir_op_inot; + case SpvOpBitwiseOr: return nir_op_ior; + case SpvOpBitwiseXor: return nir_op_ixor; + case SpvOpBitwiseAnd: return nir_op_iand; + case SpvOpSelect: return nir_op_bcsel; + case SpvOpIEqual: return nir_op_ieq; + + case SpvOpBitFieldInsert: return nir_op_bitfield_insert; + case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; + case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; + case SpvOpBitReverse: return nir_op_bitfield_reverse; + case SpvOpBitCount: return nir_op_bit_count; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: return nir_op_feq; + case SpvOpFUnordEqual: return nir_op_feq; + case SpvOpINotEqual: return nir_op_ine; + case SpvOpFOrdNotEqual: return nir_op_fne; + case SpvOpFUnordNotEqual: return nir_op_fne; + case SpvOpULessThan: return nir_op_ult; + case SpvOpSLessThan: return nir_op_ilt; + case SpvOpFOrdLessThan: return nir_op_flt; + case SpvOpFUnordLessThan: return nir_op_flt; + case SpvOpUGreaterThan: *swap = true; return nir_op_ult; + case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; + case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; + case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; + case SpvOpULessThanEqual: *swap = true; return nir_op_uge; + case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; + case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpUGreaterThanEqual: return nir_op_uge; + case SpvOpSGreaterThanEqual: return nir_op_ige; + case SpvOpFOrdGreaterThanEqual: return nir_op_fge; + case SpvOpFUnordGreaterThanEqual: return nir_op_fge; + + /* Conversions: */ + case SpvOpConvertFToU: return nir_op_f2u; + case SpvOpConvertFToS: return nir_op_f2i; + case SpvOpConvertSToF: return nir_op_i2f; + case SpvOpConvertUToF: return nir_op_u2f; + case SpvOpBitcast: return nir_op_imov; + case SpvOpUConvert: + case SpvOpQuantizeToF16: return nir_op_fquantize2f16; + /* TODO: NIR is 32-bit only; these are no-ops. */ + case SpvOpSConvert: return nir_op_imov; + case SpvOpFConvert: return nir_op_fmov; + + /* Derivatives: */ + case SpvOpDPdx: return nir_op_fddx; + case SpvOpDPdy: return nir_op_fddy; + case SpvOpDPdxFine: return nir_op_fddx_fine; + case SpvOpDPdyFine: return nir_op_fddy_fine; + case SpvOpDPdxCoarse: return nir_op_fddx_coarse; + case SpvOpDPdyCoarse: return nir_op_fddy_coarse; + + default: + unreachable("No NIR equivalent"); + } +} + +void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + /* Collect the various SSA sources */ + const unsigned num_inputs = count - 3; + struct vtn_ssa_value *vtn_src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + vtn_src[i] = vtn_ssa_value(b, w[i + 3]); + + if (glsl_type_is_matrix(vtn_src[0]->type) || + (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { + vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); + return; + } + + val->ssa = vtn_create_ssa_value(b, type); + nir_ssa_def *src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) { + assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); + src[i] = vtn_src[i]->def; + } + + switch (opcode) { + case SpvOpAny: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_FALSE), + NULL, NULL); + } + return; + + case SpvOpAll: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_TRUE), + NULL, NULL); + } + return; + + case SpvOpOuterProduct: { + for (unsigned i = 0; i < src[1]->num_components; i++) { + val->ssa->elems[i]->def = + nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); + } + return; + } + + case SpvOpDot: + val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); + return; + + case SpvOpIAddCarry: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); + return; + + case SpvOpISubBorrow: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); + return; + + case SpvOpUMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); + return; + + case SpvOpSMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); + return; + + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpIsNan: + val->ssa->def = nir_fne(&b->nb, src[0], src[0]); + return; + + case SpvOpIsInf: + val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), + nir_imm_float(&b->nb, INFINITY)); + return; + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); + return; + } /* default */ + } +} diff --git a/src/compiler/nir/spirv/vtn_cfg.c b/src/compiler/nir/spirv/vtn_cfg.c new file mode 100644 index 00000000000..041408b1cfb --- /dev/null +++ b/src/compiler/nir/spirv/vtn_cfg.c @@ -0,0 +1,768 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" + +static bool +vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + list_inithead(&b->func->body); + b->func->control = w[3]; + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + func->num_params = glsl_get_length(func_type); + func->params = ralloc_array(b->shader, nir_parameter, func->num_params); + for (unsigned i = 0; i < func->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + func->params[i].type = param->type; + if (param->in) { + if (param->out) { + func->params[i].param_type = nir_parameter_inout; + } else { + func->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + func->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + func->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(func); + if (!glsl_type_is_void(func->return_type)) { + b->func->impl->return_var = + nir_local_variable_create(b->func->impl, func->return_type, "ret"); + } + + b->func_param_idx = 0; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + + assert(b->func_param_idx < b->func->impl->num_params); + unsigned idx = b->func_param_idx++; + + nir_variable *param = + nir_local_variable_create(b->func->impl, + b->func->impl->function->params[idx].type, + val->name); + b->func->impl->params[idx] = param; + + struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); + vtn_var->mode = vtn_variable_mode_param; + vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + vtn_var->var = param; + vtn_var->chain.var = vtn_var; + vtn_var->chain.length = 0; + + val->access_chain = &vtn_var->chain; + break; + } + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->node.type = vtn_cf_node_type_block; + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge == NULL); + b->block->merge = w; + break; + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block && b->block->branch == NULL); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static void +vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, + struct vtn_block *break_block, + uint32_t block_id, uint32_t val, bool is_default) +{ + struct vtn_block *case_block = + vtn_value(b, block_id, vtn_value_type_block)->block; + + /* Don't create dummy cases that just break */ + if (case_block == break_block) + return; + + if (case_block->switch_case == NULL) { + struct vtn_case *c = ralloc(b, struct vtn_case); + + list_inithead(&c->body); + c->start_block = case_block; + c->fallthrough = NULL; + nir_array_init(&c->values, b); + c->is_default = false; + c->visited = false; + + list_addtail(&c->link, &swtch->cases); + + case_block->switch_case = c; + } + + if (is_default) { + case_block->switch_case->is_default = true; + } else { + nir_array_add(&case_block->switch_case->values, uint32_t, val); + } +} + +/* This function performs a depth-first search of the cases and puts them + * in fall-through order. + */ +static void +vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) +{ + if (cse->visited) + return; + + cse->visited = true; + + list_del(&cse->link); + + if (cse->fallthrough) { + vtn_order_case(swtch, cse->fallthrough); + + /* If we have a fall-through, place this case right before the case it + * falls through to. This ensures that fallthroughs come one after + * the other. These two can never get separated because that would + * imply something else falling through to the same case. Also, this + * can't break ordering because the DFS ensures that this case is + * visited before anything that falls through to it. + */ + list_addtail(&cse->link, &cse->fallthrough->link); + } else { + list_add(&cse->link, &swtch->cases); + } +} + +static enum vtn_branch_type +vtn_get_branch_type(struct vtn_block *block, + struct vtn_case *swcase, struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont) +{ + if (block->switch_case) { + /* This branch is actually a fallthrough */ + assert(swcase->fallthrough == NULL || + swcase->fallthrough == block->switch_case); + swcase->fallthrough = block->switch_case; + return vtn_branch_type_switch_fallthrough; + } else if (block == switch_break) { + return vtn_branch_type_switch_break; + } else if (block == loop_break) { + return vtn_branch_type_loop_break; + } else if (block == loop_cont) { + return vtn_branch_type_loop_continue; + } else { + return vtn_branch_type_none; + } +} + +static void +vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, + struct vtn_block *start, struct vtn_case *switch_case, + struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont, + struct vtn_block *end) +{ + struct vtn_block *block = start; + while (block != end) { + if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && + !block->loop) { + struct vtn_loop *loop = ralloc(b, struct vtn_loop); + + loop->node.type = vtn_cf_node_type_loop; + list_inithead(&loop->body); + list_inithead(&loop->cont_body); + loop->control = block->merge[3]; + + list_addtail(&loop->node.link, cf_list); + block->loop = loop; + + struct vtn_block *new_loop_break = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + struct vtn_block *new_loop_cont = + vtn_value(b, block->merge[2], vtn_value_type_block)->block; + + /* Note: This recursive call will start with the current block as + * its start block. If we weren't careful, we would get here + * again and end up in infinite recursion. This is why we set + * block->loop above and check for it before creating one. This + * way, we only create the loop once and the second call that + * tries to handle this loop goes to the cases below and gets + * handled as a regular block. + * + * Note: When we make the recursive walk calls, we pass NULL for + * the switch break since you have to break out of the loop first. + * We do, however, still pass the current switch case because it's + * possible that the merge block for the loop is the start of + * another case. + */ + vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, + new_loop_break, new_loop_cont, NULL ); + vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, + new_loop_break, NULL, block); + + block = new_loop_break; + continue; + } + + assert(block->node.link.next == NULL); + list_addtail(&block->node.link, cf_list); + + switch (*block->branch & SpvOpCodeMask) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, block->branch[1], vtn_value_type_block)->block; + + block->branch_type = vtn_get_branch_type(branch_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (block->branch_type != vtn_branch_type_none) + return; + + block = branch_block; + continue; + } + + case SpvOpReturn: + case SpvOpReturnValue: + block->branch_type = vtn_branch_type_return; + return; + + case SpvOpKill: + block->branch_type = vtn_branch_type_discard; + return; + + case SpvOpBranchConditional: { + struct vtn_block *then_block = + vtn_value(b, block->branch[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, block->branch[3], vtn_value_type_block)->block; + + struct vtn_if *if_stmt = ralloc(b, struct vtn_if); + + if_stmt->node.type = vtn_cf_node_type_if; + if_stmt->condition = block->branch[1]; + list_inithead(&if_stmt->then_body); + list_inithead(&if_stmt->else_body); + + list_addtail(&if_stmt->node.link, cf_list); + + if (block->merge && + (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { + if_stmt->control = block->merge[2]; + } + + if_stmt->then_type = vtn_get_branch_type(then_block, + switch_case, switch_break, + loop_break, loop_cont); + if_stmt->else_type = vtn_get_branch_type(else_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (if_stmt->then_type == vtn_branch_type_none && + if_stmt->else_type == vtn_branch_type_none) { + /* Neither side of the if is something we can short-circuit. */ + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + + enum vtn_branch_type merge_type = + vtn_get_branch_type(merge_block, switch_case, switch_break, + loop_break, loop_cont); + if (merge_type == vtn_branch_type_none) { + block = merge_block; + continue; + } else { + return; + } + } else if (if_stmt->then_type != vtn_branch_type_none && + if_stmt->else_type != vtn_branch_type_none) { + /* Both sides were short-circuited. We're done here. */ + return; + } else { + /* Exeactly one side of the branch could be short-circuited. + * We set the branch up as a predicated break/continue and we + * continue on with the other side as if it were what comes + * after the if. + */ + if (if_stmt->then_type == vtn_branch_type_none) { + block = then_block; + } else { + block = else_block; + } + continue; + } + unreachable("Should have returned or continued"); + } + + case SpvOpSwitch: { + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *break_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + struct vtn_switch *swtch = ralloc(b, struct vtn_switch); + + swtch->node.type = vtn_cf_node_type_switch; + swtch->selector = block->branch[1]; + list_inithead(&swtch->cases); + + list_addtail(&swtch->node.link, cf_list); + + /* First, we go through and record all of the cases. */ + const uint32_t *branch_end = + block->branch + (block->branch[0] >> SpvWordCountShift); + + vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); + for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) + vtn_add_case(b, swtch, break_block, w[1], w[0], false); + + /* Now, we go through and walk the blocks. While we walk through + * the blocks, we also gather the much-needed fall-through + * information. + */ + list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { + assert(cse->start_block != break_block); + vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, + break_block, NULL, loop_cont, NULL); + } + + /* Finally, we walk over all of the cases one more time and put + * them in fall-through order. + */ + for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { + struct vtn_block *case_block = + vtn_value(b, *w, vtn_value_type_block)->block; + + if (case_block == break_block) + continue; + + assert(case_block->switch_case); + + vtn_order_case(swtch, case_block->switch_case); + } + + block = break_block; + continue; + } + + case SpvOpUnreachable: + return; + + default: + unreachable("Unhandled opcode"); + } + } +} + +void +vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) +{ + vtn_foreach_instruction(b, words, end, + vtn_cfg_handle_prepass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + vtn_cfg_walk_blocks(b, &func->body, func->start_block, + NULL, NULL, NULL, NULL, NULL); + } +} + +static bool +vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) + return true; /* Nothing to do */ + + /* If this isn't a phi node, stop. */ + if (opcode != SpvOpPhi) + return false; + + /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. + * For each phi, we create a variable with the appropreate type and + * do a load from that variable. Then, in a second pass, we add + * stores to that variable to each of the predecessor blocks. + * + * We could do something more intelligent here. However, in order to + * handle loops and things properly, we really need dominance + * information. It would end up basically being the into-SSA + * algorithm all over again. It's easier if we just let + * lower_vars_to_ssa do that for us instead of repeating it here. + */ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_variable *phi_var = + nir_local_variable_create(b->nb.impl, type->type, "phi"); + _mesa_hash_table_insert(b->phi_table, w, phi_var); + + val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); + + return true; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode != SpvOpPhi) + return true; + + struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); + assert(phi_entry); + nir_variable *phi_var = phi_entry->data; + + for (unsigned i = 3; i < count; i += 2) { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); + struct vtn_block *pred = + vtn_value(b, w[i + 1], vtn_value_type_block)->block; + + b->nb.cursor = nir_after_block_before_jump(pred->end_block); + + vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); + } + + return true; +} + +static void +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, + nir_variable *switch_fall_var, bool *has_switch_break) +{ + switch (branch_type) { + case vtn_branch_type_switch_break: + nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + *has_switch_break = true; + break; + case vtn_branch_type_switch_fallthrough: + break; /* Nothing to do */ + case vtn_branch_type_loop_break: + nir_jump(&b->nb, nir_jump_break); + break; + case vtn_branch_type_loop_continue: + nir_jump(&b->nb, nir_jump_continue); + break; + case vtn_branch_type_return: + nir_jump(&b->nb, nir_jump_return); + break; + case vtn_branch_type_discard: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + break; + } + default: + unreachable("Invalid branch type"); + } +} + +static void +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, + nir_variable *switch_fall_var, bool *has_switch_break, + vtn_instruction_handler handler) +{ + list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { + switch (node->type) { + case vtn_cf_node_type_block: { + struct vtn_block *block = (struct vtn_block *)node; + + const uint32_t *block_start = block->label; + const uint32_t *block_end = block->merge ? block->merge : + block->branch; + + block_start = vtn_foreach_instruction(b, block_start, block_end, + vtn_handle_phis_first_pass); + + vtn_foreach_instruction(b, block_start, block_end, handler); + + block->end_block = nir_cursor_current_block(b->nb.cursor); + + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { + struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); + vtn_local_store(b, src, + nir_deref_var_create(b, b->impl->return_var)); + } + + if (block->branch_type != vtn_branch_type_none) { + vtn_emit_branch(b, block->branch_type, + switch_fall_var, has_switch_break); + } + + break; + } + + case vtn_cf_node_type_if: { + struct vtn_if *vtn_if = (struct vtn_if *)node; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = + nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + bool sw_break = false; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + if (vtn_if->then_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->then_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + if (vtn_if->else_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->else_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + + /* If we encountered a switch break somewhere inside of the if, + * then it would have been handled correctly by calling + * emit_cf_list or emit_branch for the interrior. However, we + * need to predicate everything following on wether or not we're + * still going. + */ + if (sw_break) { + *has_switch_break = true; + + nir_if *switch_if = nir_if_create(b->shader); + switch_if->condition = + nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); + nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + } + break; + } + + case vtn_cf_node_type_loop: { + struct vtn_loop *vtn_loop = (struct vtn_loop *)node; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + + if (!list_empty(&vtn_loop->cont_body)) { + /* If we have a non-trivial continue body then we need to put + * it at the beginning of the loop with a flag to ensure that + * it doesn't get executed in the first iteration. + */ + nir_variable *do_cont = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); + + b->nb.cursor = nir_before_cf_node(&loop->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); + + b->nb.cursor = nir_before_cf_list(&loop->body); + nir_if *cont_if = nir_if_create(b->shader); + cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); + nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&cont_if->then_list); + vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); + + b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); + + b->has_loop_continue = true; + } + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + break; + } + + case vtn_cf_node_type_switch: { + struct vtn_switch *vtn_switch = (struct vtn_switch *)node; + + /* First, we create a variable to keep track of whether or not the + * switch is still going at any given point. Any switch breaks + * will set this variable to false. + */ + nir_variable *fall_var = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + + /* Next, we gather up all of the conditions. We have to do this + * up-front because we also need to build an "any" condition so + * that we can use !any for default. + */ + const int num_cases = list_length(&vtn_switch->cases); + NIR_VLA(nir_ssa_def *, conditions, num_cases); + + nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; + /* An accumulation of all conditions. Used for the default */ + nir_ssa_def *any = NULL; + + int i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + if (cse->is_default) { + conditions[i++] = NULL; + continue; + } + + nir_ssa_def *cond = NULL; + nir_array_foreach(&cse->values, uint32_t, val) { + nir_ssa_def *is_val = + nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); + + cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; + } + + any = any ? nir_ior(&b->nb, any, cond) : cond; + conditions[i++] = cond; + } + assert(i == num_cases); + + /* Now we can walk the list of cases and actually emit code */ + i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + /* Figure out the condition */ + nir_ssa_def *cond = conditions[i++]; + if (cse->is_default) { + assert(cond == NULL); + cond = nir_inot(&b->nb, any); + } + /* Take fallthrough into account */ + cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); + + nir_if *case_if = nir_if_create(b->nb.shader); + case_if->condition = nir_src_for_ssa(cond); + nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); + + bool has_break = false; + b->nb.cursor = nir_after_cf_list(&case_if->then_list); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); + (void)has_break; /* We don't care */ + + b->nb.cursor = nir_after_cf_node(&case_if->cf_node); + } + assert(i == num_cases); + + break; + } + + default: + unreachable("Invalid CF node type"); + } + } +} + +void +vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler) +{ + nir_builder_init(&b->nb, func->impl); + b->nb.cursor = nir_after_cf_list(&func->impl->body); + b->has_loop_continue = false; + b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); + + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + + /* Continue blocks for loops get inserted before the body of the loop + * but instructions in the continue may use SSA defs in the loop body. + * Therefore, we need to repair SSA to insert the needed phi nodes. + */ + if (b->has_loop_continue) + nir_repair_ssa_impl(func->impl); +} diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c new file mode 100644 index 00000000000..bc38aa4b1be --- /dev/null +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -0,0 +1,684 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "vtn_private.h" +#include "GLSL.std.450.h" + +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + +static nir_ssa_def * +build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) +{ + unsigned swiz[4] = {1, 0, 0, 0}; + nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); + return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); +} + +static nir_ssa_def * +build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) +{ + unsigned yzx[4] = {1, 2, 0, 0}; + unsigned zxy[4] = {2, 0, 1, 0}; + + nir_ssa_def *prod0 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), + nir_swizzle(b, col[2], zxy, 3, true))); + nir_ssa_def *prod1 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), + nir_swizzle(b, col[2], yzx, 3, true))); + + nir_ssa_def *diff = nir_fsub(b, prod0, prod1); + + return nir_fadd(b, nir_channel(b, diff, 0), + nir_fadd(b, nir_channel(b, diff, 1), + nir_channel(b, diff, 2))); +} + +static nir_ssa_def * +build_mat4_det(nir_builder *b, nir_ssa_def **col) +{ + nir_ssa_def *subdet[4]; + for (unsigned i = 0; i < 4; i++) { + unsigned swiz[3]; + for (unsigned j = 0, k = 0; j < 3; j++, k++) { + if (k == i) + k++; /* skip column */ + swiz[j] = k; + } + + nir_ssa_def *subcol[3]; + subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); + subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); + subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); + + subdet[i] = build_mat3_det(b, subcol); + } + + nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); + + return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), + nir_channel(b, prod, 1)), + nir_fsub(b, nir_channel(b, prod, 2), + nir_channel(b, prod, 3))); +} + +static nir_ssa_def * +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + unsigned size = glsl_get_vector_elements(src->type); + + nir_ssa_def *cols[4]; + for (unsigned i = 0; i < size; i++) + cols[i] = src->elems[i]->def; + + switch(size) { + case 2: return build_mat2_det(&b->nb, cols); + case 3: return build_mat3_det(&b->nb, cols); + case 4: return build_mat4_det(&b->nb, cols); + default: + unreachable("Invalid matrix size"); + } +} + +/* Computes the determinate of the submatrix given by taking src and + * removing the specified row and column. + */ +static nir_ssa_def * +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, + unsigned size, unsigned row, unsigned col) +{ + assert(row < size && col < size); + if (size == 2) { + return nir_channel(b, src->elems[1 - col]->def, 1 - row); + } else { + /* Swizzle to get all but the specified row */ + unsigned swiz[3]; + for (unsigned j = 0; j < 4; j++) + swiz[j - (j > row)] = j; + + /* Grab all but the specified column */ + nir_ssa_def *subcol[3]; + for (unsigned j = 0; j < size; j++) { + if (j != col) { + subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, + swiz, size - 1, true); + } + } + + if (size == 3) { + return build_mat2_det(b, subcol); + } else { + assert(size == 4); + return build_mat3_det(b, subcol); + } + } +} + +static struct vtn_ssa_value * +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + nir_ssa_def *adj_col[4]; + unsigned size = glsl_get_vector_elements(src->type); + + /* Build up an adjugate matrix */ + for (unsigned c = 0; c < size; c++) { + nir_ssa_def *elem[4]; + for (unsigned r = 0; r < size; r++) { + elem[r] = build_mat_subdet(&b->nb, src, size, c, r); + + if ((r + c) % 2) + elem[r] = nir_fneg(&b->nb, elem[r]); + } + + adj_col[c] = nir_vec(&b->nb, elem, size); + } + + nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); + + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); + for (unsigned i = 0; i < size; i++) + val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); + + return val; +} + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static inline nir_ssa_def * +build_fclamp(nir_builder *b, + nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) +{ + return nir_fmin(b, nir_fmax(b, x, min_val), max_val); +} + +/** + * Return e^x. + */ +static nir_ssa_def * +build_exp(nir_builder *b, nir_ssa_def *x) +{ + return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); +} + +/** + * Return ln(x) - the natural logarithm of x. + */ +static nir_ssa_def * +build_log(nir_builder *b, nir_ssa_def *x) +{ + return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); +} + +static nir_ssa_def * +build_asin(nir_builder *b, nir_ssa_def *x) +{ + /* + * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))) + */ + nir_ssa_def *abs_x = nir_fabs(b, x); + return nir_fmul(b, nir_fsign(b, x), + nir_fsub(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fmul(b, abs_x, + nir_imm_float(b, -0.03102955f)))))))))); +} + +static nir_ssa_def * +build_acos(nir_builder *b, nir_ssa_def *x) +{ + /* + * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318))) + */ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, 0.08132463f), + nir_fmul(b, abs_x, + nir_imm_float(b, -0.02363318f)))))))); + return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), + nir_fsub(b, nir_imm_float(b, M_PI), poly), + poly); +} + +/** + * Compute xs[0] + xs[1] + xs[2] + ... using fadd. + */ +static nir_ssa_def * +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) +{ + nir_ssa_def *accum = xs[0]; + + for (int i = 1; i < terms; i++) + accum = nir_fadd(b, accum, xs[i]); + + return accum; +} + +static nir_ssa_def * +build_atan(nir_builder *b, nir_ssa_def *y_over_x) +{ + nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); + nir_ssa_def *one = nir_imm_float(b, 1.0f); + + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), + nir_fmax(b, abs_y_over_x, one)); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + nir_ssa_def *x_2 = nir_fmul(b, x, x); + nir_ssa_def *x_3 = nir_fmul(b, x_2, x); + nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); + nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); + nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); + nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); + + nir_ssa_def *polynomial_terms[] = { + nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), + nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), + nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), + nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), + nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), + nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), + }; + + nir_ssa_def *tmp = + build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); + + /* range-reduction fixup */ + tmp = nir_fadd(b, tmp, + nir_fmul(b, + nir_b2f(b, nir_flt(b, one, abs_y_over_x)), + nir_fadd(b, nir_fmul(b, tmp, + nir_imm_float(b, -2.0f)), + nir_imm_float(b, M_PI_2f)))); + + /* sign fixup */ + return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); +} + +static nir_ssa_def * +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) +{ + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* If |x| >= 1.0e-8 * |y|: */ + nir_ssa_def *condition = + nir_fge(b, nir_fabs(b, x), + nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); + + /* Then...call atan(y/x) and fix it up: */ + nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); + nir_ssa_def *r_then = + nir_bcsel(b, nir_flt(b, x, zero), + nir_fadd(b, atan1, + nir_bcsel(b, nir_fge(b, y, zero), + nir_imm_float(b, M_PIf), + nir_imm_float(b, -M_PIf))), + atan1); + + /* Else... */ + nir_ssa_def *r_else = + nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); + + return nir_bcsel(b, condition, r_then, r_else); +} + +static nir_ssa_def * +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + nir_ssa_def *exponent_shift = nir_imm_int(b, 23); + nir_ssa_def *exponent_bias = nir_imm_int(b, -126); + + nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); + + nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); + + *exponent = + nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), + nir_bcsel(b, is_not_zero, exponent_bias, zero)); + + return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), + nir_bcsel(b, is_not_zero, exponent_value, zero)); +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, + const uint32_t *w, unsigned count) +{ + struct nir_builder *nb = &b->nb; + const struct glsl_type *dest_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, dest_type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5])->def; + + nir_op op; + switch (entrypoint) { + case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ + case GLSLstd450RoundEven: op = nir_op_fround_even; break; + case GLSLstd450Trunc: op = nir_op_ftrunc; break; + case GLSLstd450FAbs: op = nir_op_fabs; break; + case GLSLstd450SAbs: op = nir_op_iabs; break; + case GLSLstd450FSign: op = nir_op_fsign; break; + case GLSLstd450SSign: op = nir_op_isign; break; + case GLSLstd450Floor: op = nir_op_ffloor; break; + case GLSLstd450Ceil: op = nir_op_fceil; break; + case GLSLstd450Fract: op = nir_op_ffract; break; + case GLSLstd450Radians: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); + return; + case GLSLstd450Degrees: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); + return; + case GLSLstd450Sin: op = nir_op_fsin; break; + case GLSLstd450Cos: op = nir_op_fcos; break; + case GLSLstd450Tan: + val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), + nir_fcos(nb, src[0])); + return; + case GLSLstd450Pow: op = nir_op_fpow; break; + case GLSLstd450Exp2: op = nir_op_fexp2; break; + case GLSLstd450Log2: op = nir_op_flog2; break; + case GLSLstd450Sqrt: op = nir_op_fsqrt; break; + case GLSLstd450InverseSqrt: op = nir_op_frsq; break; + + case GLSLstd450Modf: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), + nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); + return; + } + + case GLSLstd450ModfStruct: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); + return; + } + + case GLSLstd450FMin: op = nir_op_fmin; break; + case GLSLstd450UMin: op = nir_op_umin; break; + case GLSLstd450SMin: op = nir_op_imin; break; + case GLSLstd450FMax: op = nir_op_fmax; break; + case GLSLstd450UMax: op = nir_op_umax; break; + case GLSLstd450SMax: op = nir_op_imax; break; + case GLSLstd450FMix: op = nir_op_flrp; break; + case GLSLstd450Step: + val->ssa->def = nir_sge(nb, src[1], src[0]); + return; + + case GLSLstd450Fma: op = nir_op_ffma; break; + case GLSLstd450Ldexp: op = nir_op_ldexp; break; + + /* Packing/Unpacking functions */ + case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; + case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case GLSLstd450Length: + val->ssa->def = build_length(nb, src[0]); + return; + case GLSLstd450Distance: + val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); + return; + case GLSLstd450Normalize: + val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); + return; + + case GLSLstd450Exp: + val->ssa->def = build_exp(nb, src[0]); + return; + + case GLSLstd450Log: + val->ssa->def = build_log(nb, src[0]); + return; + + case GLSLstd450FClamp: + val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); + return; + case GLSLstd450UClamp: + val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); + return; + case GLSLstd450SClamp: + val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); + return; + + case GLSLstd450Cross: { + unsigned yzx[4] = { 1, 2, 0, 0 }; + unsigned zxy[4] = { 2, 0, 1, 0 }; + val->ssa->def = + nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), + nir_swizzle(nb, src[1], zxy, 3, true)), + nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), + nir_swizzle(nb, src[1], yzx, 3, true))); + return; + } + + case GLSLstd450SmoothStep: { + /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ + nir_ssa_def *t = + build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), + nir_fsub(nb, src[1], src[0])), + nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); + /* result = t * t * (3 - 2 * t) */ + val->ssa->def = + nir_fmul(nb, t, nir_fmul(nb, t, + nir_fsub(nb, nir_imm_float(nb, 3.0), + nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); + return; + } + + case GLSLstd450FaceForward: + val->ssa->def = + nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), + nir_imm_float(nb, 0.0)), + src[0], nir_fneg(nb, src[0])); + return; + + case GLSLstd450Reflect: + /* I - 2 * dot(N, I) * N */ + val->ssa->def = + nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), + nir_fmul(nb, nir_fdot(nb, src[0], src[1]), + src[1]))); + return; + + case GLSLstd450Refract: { + nir_ssa_def *I = src[0]; + nir_ssa_def *N = src[1]; + nir_ssa_def *eta = src[2]; + nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); + nir_ssa_def *one = nir_imm_float(nb, 1.0); + nir_ssa_def *zero = nir_imm_float(nb, 0.0); + /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ + nir_ssa_def *k = + nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, + nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); + nir_ssa_def *result = + nir_fsub(nb, nir_fmul(nb, eta, I), + nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), + nir_fsqrt(nb, k)), N)); + /* XXX: bcsel, or if statement? */ + val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); + return; + } + + case GLSLstd450Sinh: + /* 0.5 * (e^x - e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Cosh: + /* 0.5 * (e^x + e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Tanh: + /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ + val->ssa->def = + nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))), + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0]))))); + return; + + case GLSLstd450Asinh: + val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), + build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), + nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f)))))); + return; + case GLSLstd450Acosh: + val->ssa->def = build_log(nb, nir_fadd(nb, src[0], + nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f))))); + return; + case GLSLstd450Atanh: { + nir_ssa_def *one = nir_imm_float(nb, 1.0); + val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), + build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), + nir_fsub(nb, one, src[0])))); + return; + } + + case GLSLstd450FindILsb: op = nir_op_find_lsb; break; + case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break; + case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; + + case GLSLstd450Asin: + val->ssa->def = build_asin(nb, src[0]); + return; + + case GLSLstd450Acos: + val->ssa->def = build_acos(nb, src[0]); + return; + + case GLSLstd450Atan: + val->ssa->def = build_atan(nb, src[0]); + return; + + case GLSLstd450Atan2: + val->ssa->def = build_atan2(nb, src[0], src[1]); + return; + + case GLSLstd450Frexp: { + nir_ssa_def *exponent; + val->ssa->def = build_frexp(nb, src[0], &exponent); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); + return; + } + + case GLSLstd450FrexpStruct: { + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = build_frexp(nb, src[0], + &val->ssa->elems[1]->def); + return; + } + + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + default: + unreachable("Unhandled opcode"); + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->ssa->type), val->name); + instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(nb, &instr->instr); +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *w, unsigned count) +{ + switch ((enum GLSLstd450)ext_opcode) { + case GLSLstd450Determinant: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450MatrixInverse: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); + } + + return true; +} diff --git a/src/compiler/nir/spirv/vtn_private.h b/src/compiler/nir/spirv/vtn_private.h new file mode 100644 index 00000000000..3840d8c4b65 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_private.h @@ -0,0 +1,484 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_array.h" +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_access_chain, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, + vtn_value_type_extension, + vtn_value_type_image_pointer, + vtn_value_type_sampled_image, +}; + +enum vtn_branch_type { + vtn_branch_type_none, + vtn_branch_type_switch_break, + vtn_branch_type_switch_fallthrough, + vtn_branch_type_loop_break, + vtn_branch_type_loop_continue, + vtn_branch_type_discard, + vtn_branch_type_return, +}; + +enum vtn_cf_node_type { + vtn_cf_node_type_block, + vtn_cf_node_type_if, + vtn_cf_node_type_loop, + vtn_cf_node_type_switch, +}; + +struct vtn_cf_node { + struct list_head link; + enum vtn_cf_node_type type; +}; + +struct vtn_loop { + struct vtn_cf_node node; + + /* The main body of the loop */ + struct list_head body; + + /* The "continue" part of the loop. This gets executed after the body + * and is where you go when you hit a continue. + */ + struct list_head cont_body; + + SpvLoopControlMask control; +}; + +struct vtn_if { + struct vtn_cf_node node; + + uint32_t condition; + + enum vtn_branch_type then_type; + struct list_head then_body; + + enum vtn_branch_type else_type; + struct list_head else_body; + + SpvSelectionControlMask control; +}; + +struct vtn_case { + struct list_head link; + + struct list_head body; + + /* The block that starts this case */ + struct vtn_block *start_block; + + /* The fallthrough case, if any */ + struct vtn_case *fallthrough; + + /* The uint32_t values that map to this case */ + nir_array values; + + /* True if this is the default case */ + bool is_default; + + /* Initialized to false; used when sorting the list of cases */ + bool visited; +}; + +struct vtn_switch { + struct vtn_cf_node node; + + uint32_t selector; + + struct list_head cases; +}; + +struct vtn_block { + struct vtn_cf_node node; + + /** A pointer to the label instruction */ + const uint32_t *label; + + /** A pointer to the merge instruction (or NULL if non exists) */ + const uint32_t *merge; + + /** A pointer to the branch instruction that ends this block */ + const uint32_t *branch; + + enum vtn_branch_type branch_type; + + /** Points to the loop that this block starts (if it starts a loop) */ + struct vtn_loop *loop; + + /** Points to the switch case started by this block (if any) */ + struct vtn_case *switch_case; + + /** The last block in this SPIR-V block. */ + nir_block *end_block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_impl *impl; + struct vtn_block *start_block; + + struct list_head body; + + const uint32_t *end; + + SpvFunctionControlMask control; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, + const uint32_t *end); +void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler); + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler); + +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, if this is non-NULL, then this value is actually the + * transpose of some other value. The value that `transposed` points to + * always dominates this value. + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* The value that declares this type. Used for finding decorations */ + struct vtn_value *val; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* Image format for image_load_store type images */ + unsigned image_format; + + /* Access qualifier for storage images */ + SpvAccessQualifier access_qualifier; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + +struct vtn_variable; + +enum vtn_access_mode { + vtn_access_mode_id, + vtn_access_mode_literal, +}; + +struct vtn_access_link { + enum vtn_access_mode mode; + uint32_t id; +}; + +struct vtn_access_chain { + struct vtn_variable *var; + + uint32_t length; + + /* Struct elements and array offsets */ + struct vtn_access_link link[0]; +}; + +enum vtn_variable_mode { + vtn_variable_mode_local, + vtn_variable_mode_global, + vtn_variable_mode_param, + vtn_variable_mode_ubo, + vtn_variable_mode_ssbo, + vtn_variable_mode_push_constant, + vtn_variable_mode_image, + vtn_variable_mode_sampler, + vtn_variable_mode_workgroup, + vtn_variable_mode_input, + vtn_variable_mode_output, +}; + +struct vtn_variable { + enum vtn_variable_mode mode; + + struct vtn_type *type; + + unsigned descriptor_set; + unsigned binding; + + nir_variable *var; + nir_variable **members; + + struct vtn_access_chain chain; +}; + +struct vtn_image_pointer { + struct vtn_access_chain *image; + nir_ssa_def *coord; + nir_ssa_def *sample; +}; + +struct vtn_sampled_image { + struct vtn_access_chain *image; /* Image or array of images */ + struct vtn_access_chain *sampler; /* Sampler */ +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct vtn_access_chain *access_chain; + struct vtn_image_pointer *image; + struct vtn_sampled_image *sampled_image; + struct vtn_function *func; + struct vtn_block *block; + struct vtn_ssa_value *ssa; + vtn_instruction_handler ext_handler; + }; +}; + +#define VTN_DEC_DECORATION -1 +#define VTN_DEC_EXECUTION_MODE -2 +#define VTN_DEC_STRUCT_MEMBER0 0 + +struct vtn_decoration { + struct vtn_decoration *next; + + /* Specifies how to apply this decoration. Negative values represent a + * decoration or execution mode. (See the VTN_DEC_ #defines above.) + * Non-negative values specify that it applies to a structure member. + */ + int scope; + + const uint32_t *literals; + struct vtn_value *group; + + union { + SpvDecoration decoration; + SpvExecutionMode exec_mode; + }; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + + /* Current file, line, and column. Useful for debugging. Set + * automatically by vtn_foreach_instruction. + */ + char *file; + int line, col; + + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from phi instructions (pointer to the start of the instruction) + * to the variable corresponding to it. + */ + struct hash_table *phi_table; + + unsigned num_specializations; + struct nir_spirv_specialization *specializations; + + unsigned value_id_bound; + struct vtn_value *values; + + gl_shader_stage entry_point_stage; + const char *entry_point_name; + struct vtn_value *entry_point; + bool origin_upper_left; + + struct vtn_function *func; + struct exec_list functions; + + /* Current function parameter index */ + unsigned func_param_idx; + + bool has_loop_continue; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, + const struct glsl_type *type); + +struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, + struct vtn_ssa_value *src); + +nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, + unsigned index); +nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index); +nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, unsigned index); +nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index); + +nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); + +nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, + struct vtn_access_chain *chain); +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix); + +struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); + +void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest); + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); + +void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest); + +void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + int member, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); + +typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data); + +nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); + +void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c new file mode 100644 index 00000000000..3ad98aa5310 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_variables.c @@ -0,0 +1,1412 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "vtn_private.h" + +static struct vtn_access_chain * +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, + unsigned new_ids) +{ + struct vtn_access_chain *chain; + + unsigned new_len = old->length + new_ids; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); + + chain->var = old->var; + chain->length = new_len; + + for (unsigned i = 0; i < old->length; i++) + chain->link[i] = old->link[i]; + + return chain; +} + +static nir_ssa_def * +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, + unsigned stride) +{ + assert(stride > 0); + if (link.mode == vtn_access_mode_literal) { + return nir_imm_int(&b->nb, link.id * stride); + } else if (stride == 1) { + return vtn_ssa_value(b, link.id)->def; + } else { + return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, + nir_imm_int(&b->nb, stride)); + } +} + +static struct vtn_type * +vtn_access_chain_tail_type(struct vtn_builder *b, + struct vtn_access_chain *chain) +{ + struct vtn_type *type = chain->var->type; + for (unsigned i = 0; i < chain->length; i++) { + if (glsl_type_is_struct(type->type)) { + assert(chain->link[i].mode == vtn_access_mode_literal); + type = type->members[chain->link[i].id]; + } else { + type = type->array_element; + } + } + return type; +} + +/* Crawls a chain of array derefs and rewrites the types so that the + * lengths stay the same but the terminal type is the one given by + * tail_type. This is useful for split structures. + */ +static void +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) +{ + deref->type = type; + if (deref->child) { + assert(deref->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_array(deref->type)); + rewrite_deref_types(deref->child, glsl_get_array_element(type)); + } +} + +nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) +{ + nir_deref_var *deref_var; + if (chain->var->var) { + deref_var = nir_deref_var_create(b, chain->var->var); + } else { + assert(chain->var->members); + /* Create the deref_var manually. It will get filled out later. */ + deref_var = rzalloc(b, nir_deref_var); + deref_var->deref.deref_type = nir_deref_type_var; + } + + struct vtn_type *deref_type = chain->var->type; + nir_deref *tail = &deref_var->deref; + nir_variable **members = chain->var->members; + + for (unsigned i = 0; i < chain->length; i++) { + enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + deref_type = deref_type->array_element; + + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref.type = deref_type->type; + + if (chain->link[i].mode == vtn_access_mode_literal) { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = chain->link[i].id; + } else { + assert(chain->link[i].mode == vtn_access_mode_id); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); + } + tail->child = &deref_arr->deref; + tail = tail->child; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(chain->link[i].mode == vtn_access_mode_literal); + unsigned idx = chain->link[i].id; + deref_type = deref_type->members[idx]; + if (members) { + /* This is a pre-split structure. */ + deref_var->var = members[idx]; + rewrite_deref_types(&deref_var->deref, members[idx]->type); + assert(tail->type == deref_type->type); + members = NULL; + } else { + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + tail = tail->child; + } + break; + } + default: + unreachable("Invalid type for deref"); + } + } + + assert(members == NULL); + return deref_var; +} + +static void +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, + nir_deref *tail, struct vtn_ssa_value *inout) +{ + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = tail->child; + + if (glsl_type_is_vector_or_scalar(tail->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + tail->child = NULL; + + nir_intrinsic_op op = load ? nir_intrinsic_load_var : + nir_intrinsic_store_var; + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); + intrin->num_components = glsl_get_vector_elements(tail->type); + + if (load) { + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + intrin->num_components, NULL); + inout->def = &intrin->dest.ssa; + } else { + intrin->const_index[0] = (1 << intrin->num_components) - 1; + intrin->src[0] = nir_src_for_ssa(inout->def); + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); + } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + unsigned elems = glsl_get_length(tail->type); + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->deref.type = glsl_get_array_element(tail->type); + tail->child = &deref_arr->deref; + for (unsigned i = 0; i < elems; i++) { + deref_arr->base_offset = i; + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } else { + assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(tail->type); + nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); + tail->child = &deref_struct->deref; + for (unsigned i = 0; i < elems; i++) { + deref_struct->index = i; + deref_struct->deref.type = glsl_get_struct_field(tail->type, i); + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } + + tail->child = old_child; +} + +nir_deref_var * +vtn_nir_deref(struct vtn_builder *b, uint32_t id) +{ + struct vtn_access_chain *chain = + vtn_value(b, id, vtn_value_type_access_chain)->access_chain; + + return vtn_access_chain_to_deref(b, chain); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +struct vtn_ssa_value * +vtn_local_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); + _vtn_local_load_store(b, true, src, src_tail, val); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +void +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + + if (dest_tail->child) { + struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); + _vtn_local_load_store(b, true, dest, dest_tail, val); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_local_load_store(b, false, dest, dest_tail, val); + } else { + _vtn_local_load_store(b, false, dest, dest_tail, src); + } +} + +static nir_ssa_def * +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, + struct vtn_type **type, unsigned *chain_idx) +{ + /* Push constants have no explicit binding */ + if (chain->var->mode == vtn_variable_mode_push_constant) { + *chain_idx = 0; + *type = chain->var->type; + return NULL; + } + + nir_ssa_def *array_index; + if (glsl_type_is_array(chain->var->type->type)) { + assert(chain->length > 0); + array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); + *chain_idx = 1; + *type = chain->var->type->array_element; + } else { + array_index = nir_imm_int(&b->nb, 0); + *chain_idx = 0; + *type = chain->var->type; + } + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = chain->var->descriptor_set; + instr->const_index[1] = chain->var->binding; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + + return &instr->dest.ssa; +} + +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix) +{ + unsigned idx = 0; + struct vtn_type *type; + *index_out = get_vulkan_resource_index(b, chain, &type, &idx); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + for (; idx < chain->length; idx++) { + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + /* Some users may not want matrix or vector derefs */ + if (stop_at_matrix) + goto end; + /* Fall through */ + + case GLSL_TYPE_ARRAY: + offset = nir_iadd(&b->nb, offset, + vtn_access_link_as_ssa(b, chain->link[idx], + type->stride)); + + type = type->array_element; + break; + + case GLSL_TYPE_STRUCT: { + assert(chain->link[idx].mode == vtn_access_mode_literal); + unsigned member = chain->link[idx].id; + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + break; + } + + default: + unreachable("Invalid type for deref"); + } + } + +end: + *type_out = type; + if (end_idx_out) + *end_idx_out = idx; + + return offset; +} + +static void +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_ssa_value **inout, const struct glsl_type *type) +{ + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); + instr->num_components = glsl_get_vector_elements(type); + + int src = 0; + if (!load) { + instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ + instr->src[src++] = nir_src_for_ssa((*inout)->def); + } + + /* We set the base and size for push constant load to the entire push + * constant block for now. + */ + if (op == nir_intrinsic_load_push_constant) { + instr->const_index[0] = 0; + instr->const_index[1] = 128; + } + + if (index) + instr->src[src++] = nir_src_for_ssa(index); + + instr->src[src++] = nir_src_for_ssa(offset); + + if (load) { + nir_ssa_dest_init(&instr->instr, &instr->dest, + instr->num_components, NULL); + (*inout)->def = &instr->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &instr->instr); + + if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) + (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); +} + +static void +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_access_chain *chain, unsigned chain_idx, + struct vtn_type *type, struct vtn_ssa_value **inout) +{ + if (chain && chain_idx >= chain->length) + chain = NULL; + + if (load && chain == NULL && *inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); + + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* This is where things get interesting. At this point, we've hit + * a vector, a scalar, or a matrix. + */ + if (glsl_type_is_matrix(type->type)) { + if (chain == NULL) { + /* Loading the whole matrix */ + struct vtn_ssa_value *transpose; + unsigned num_ops, vec_width; + if (type->row_major) { + num_ops = glsl_get_vector_elements(type->type); + vec_width = glsl_get_matrix_columns(type->type); + if (load) { + const struct glsl_type *transpose_type = + glsl_matrix_type(base_type, vec_width, num_ops); + *inout = vtn_create_ssa_value(b, transpose_type); + } else { + transpose = vtn_ssa_transpose(b, *inout); + inout = &transpose; + } + } else { + num_ops = glsl_get_matrix_columns(type->type); + vec_width = glsl_get_vector_elements(type->type); + } + + for (unsigned i = 0; i < num_ops; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + _vtn_load_store_tail(b, op, load, index, elem_offset, + &(*inout)->elems[i], + glsl_vector_type(base_type, vec_width)); + } + + if (load && type->row_major) + *inout = vtn_ssa_transpose(b, *inout); + } else if (type->row_major) { + /* Row-major but with an access chiain. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], + type->array_element->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + if (chain_idx + 1 < chain->length) { + /* Picking off a single element */ + nir_ssa_def *row_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], + type->stride); + offset = nir_iadd(&b->nb, offset, row_offset); + if (load) + *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, + glsl_scalar_type(base_type)); + } else { + /* Grabbing a column; picking one element off each row */ + unsigned num_comps = glsl_get_vector_elements(type->type); + const struct glsl_type *column_type = + glsl_get_column_type(type->type); + + nir_ssa_def *comps[4]; + for (unsigned i = 0; i < num_comps; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + + struct vtn_ssa_value *comp, temp_val; + if (!load) { + temp_val.def = nir_channel(&b->nb, (*inout)->def, i); + temp_val.type = glsl_scalar_type(base_type); + } + comp = &temp_val; + _vtn_load_store_tail(b, op, load, index, elem_offset, + &comp, glsl_scalar_type(base_type)); + comps[i] = comp->def; + } + + if (load) { + if (*inout == NULL) + *inout = vtn_create_ssa_value(b, column_type); + + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + } + } + } else { + /* Column-major with a deref. Fall through to array case. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + _vtn_block_load_store(b, op, load, index, offset, + chain, chain_idx + 1, + type->array_element, inout); + } + } else if (chain == NULL) { + /* Single whole vector */ + assert(glsl_type_is_vector_or_scalar(type->type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); + } else { + /* Single component of a vector. Fall through to array case. */ + nir_ssa_def *elem_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, elem_offset); + + _vtn_block_load_store(b, op, load, index, offset, NULL, 0, + type->array_element, inout); + } + return; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->array_element, &(*inout)->elems[i]); + } + return; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->members[i], &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid block member type"); + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + nir_intrinsic_op op; + switch (src->var->mode) { + case vtn_variable_mode_ubo: + op = nir_intrinsic_load_ubo; + break; + case vtn_variable_mode_ssbo: + op = nir_intrinsic_load_ssbo; + break; + case vtn_variable_mode_push_constant: + op = nir_intrinsic_load_push_constant; + break; + default: + assert(!"Invalid block variable mode"); + } + + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); + + struct vtn_ssa_value *value = NULL; + _vtn_block_load_store(b, op, true, index, offset, + src, chain_idx, type, &value); + return value; +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dst) +{ + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); + + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, + dst, chain_idx, type, &src); +} + +static bool +vtn_variable_is_external_block(struct vtn_variable *var) +{ + return var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_push_constant; +} + +static void +_vtn_variable_load_store(struct vtn_builder *b, bool load, + struct vtn_access_chain *chain, + struct vtn_type *tail_type, + struct vtn_ssa_value **inout) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + if (load) { + *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); + } else { + vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); + } + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_chain = + vtn_access_chain_extend(b, chain, 1); + new_chain->link[chain->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + if (load) { + assert(*inout == NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->type = tail_type->type; + (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); + } + for (unsigned i = 0; i < elems; i++) { + new_chain->link[chain->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_load_store(b, load, new_chain, elem_type, + &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + if (vtn_variable_is_external_block(src->var)) { + return vtn_block_load(b, src); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + struct vtn_ssa_value *val = NULL; + _vtn_variable_load_store(b, true, src, tail_type, &val); + return val; + } +} + +void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest) +{ + if (vtn_variable_is_external_block(dest->var)) { + assert(dest->var->mode == vtn_variable_mode_ssbo); + vtn_block_store(b, src, dest); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); + _vtn_variable_load_store(b, false, dest, tail_type, &src); + } +} + +static void +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src, struct vtn_type *tail_type) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + vtn_variable_store(b, vtn_variable_load(b, src), dest); + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_src, *new_dest; + new_src = vtn_access_chain_extend(b, src, 1); + new_dest = vtn_access_chain_extend(b, dest, 1); + new_src->link[src->length].mode = vtn_access_mode_literal; + new_dest->link[dest->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + for (unsigned i = 0; i < elems; i++) { + new_src->link[src->length].id = i; + new_dest->link[dest->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_copy(b, new_dest, new_src, elem_type); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src) +{ + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); + + /* TODO: At some point, we should add a special-case for when we can + * just emit a copy_var intrinsic. + */ + _vtn_variable_copy(b, dest, src, tail_type); +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + break; + case SpvBuiltInVertexIndex: + *location = SYSTEM_VALUE_VERTEX_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceIndex: + *location = SYSTEM_VALUE_INSTANCE_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInViewportIndex: + *location = VARYING_SLOT_VIEWPORT; + if (b->shader->stage == MESA_SHADER_GEOMETRY) + *mode = nir_var_shader_out; + else if (b->shader->stage == MESA_SHADER_FRAGMENT) + *mode = nir_var_shader_in; + else + unreachable("invalid stage for SpvBuiltInViewportIndex"); + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); + break; + case SpvBuiltInWorkgroupSize: + /* This should already be handled */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + struct vtn_variable *vtn_var = void_var; + + /* Handle decorations that apply to a vtn_variable as a whole */ + switch (dec->decoration) { + case SpvDecorationBinding: + vtn_var->binding = dec->literals[0]; + return; + case SpvDecorationDescriptorSet: + vtn_var->descriptor_set = dec->literals[0]; + return; + + case SpvDecorationLocation: { + unsigned location = dec->literals[0]; + bool is_vertex_input; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + vtn_var->mode == vtn_variable_mode_input) { + is_vertex_input = true; + location += VERT_ATTRIB_GENERIC0; + } else if (vtn_var->mode == vtn_variable_mode_input || + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + + if (vtn_var->var) { + vtn_var->var->data.location = location; + vtn_var->var->data.explicit_location = true; + } else { + assert(vtn_var->members); + unsigned length = glsl_get_length(vtn_var->type->type); + for (unsigned i = 0; i < length; i++) { + vtn_var->members[i]->data.location = location; + vtn_var->members[i]->data.explicit_location = true; + location += + glsl_count_attribute_slots(vtn_var->members[i]->interface_type, + is_vertex_input); + } + } + return; + } + + default: + break; + } + + /* Now we handle decorations that apply to a particular nir_variable */ + nir_variable *nir_var = vtn_var->var; + if (val->value_type == vtn_value_type_access_chain) { + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + assert(member == -1); + } else { + assert(val->value_type == vtn_value_type_type); + if (member != -1) + nir_var = vtn_var->members[member]; + } + + if (nir_var == NULL) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + nir_var->data.centroid = true; + break; + case SpvDecorationSample: + nir_var->data.sample = true; + break; + case SpvDecorationInvariant: + nir_var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(nir_var->constant_initializer != NULL); + nir_var->data.read_only = true; + break; + case SpvDecorationNonWritable: + nir_var->data.read_only = true; + break; + case SpvDecorationComponent: + nir_var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + nir_var->data.explicit_index = true; + nir_var->data.index = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + nir_var->data.mode = nir_var_global; + nir_var->data.read_only = true; + + nir_constant *c = rzalloc(nir_var, nir_constant); + c->value.u[0] = b->shader->info.cs.local_size[0]; + c->value.u[1] = b->shader->info.cs.local_size[1]; + c->value.u[2] = b->shader->info.cs.local_size[2]; + nir_var->constant_initializer = c; + break; + } + + nir_variable_mode mode = nir_var->data.mode; + vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); + nir_var->data.explicit_location = true; + nir_var->data.mode = mode; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + nir_var->data.origin_upper_left = b->origin_upper_left; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_variable *var = rzalloc(b, struct vtn_variable); + var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + var->chain.var = var; + var->chain.length = 0; + + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = &var->chain; + + struct vtn_type *without_array = var->type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + nir_variable_mode nir_mode; + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (without_array->block) { + var->mode = vtn_variable_mode_ubo; + b->shader->info.num_ubos++; + } else if (without_array->buffer_block) { + var->mode = vtn_variable_mode_ssbo; + b->shader->info.num_ssbos++; + } else if (glsl_type_is_image(without_array->type)) { + var->mode = vtn_variable_mode_image; + nir_mode = nir_var_uniform; + b->shader->info.num_images++; + } else if (glsl_type_is_sampler(without_array->type)) { + var->mode = vtn_variable_mode_sampler; + nir_mode = nir_var_uniform; + b->shader->info.num_textures++; + } else { + assert(!"Invalid uniform variable type"); + } + break; + case SpvStorageClassPushConstant: + var->mode = vtn_variable_mode_push_constant; + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; + break; + case SpvStorageClassInput: + var->mode = vtn_variable_mode_input; + nir_mode = nir_var_shader_in; + break; + case SpvStorageClassOutput: + var->mode = vtn_variable_mode_output; + nir_mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->mode = vtn_variable_mode_global; + nir_mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->mode = vtn_variable_mode_local; + nir_mode = nir_var_local; + break; + case SpvStorageClassWorkgroup: + var->mode = vtn_variable_mode_workgroup; + nir_mode = nir_var_shared; + break; + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + switch (var->mode) { + case vtn_variable_mode_local: + case vtn_variable_mode_global: + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + case vtn_variable_mode_workgroup: + /* For these, we create the variable normally */ + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->data.mode = nir_mode; + + switch (var->mode) { + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + var->var->interface_type = without_array->type; + break; + default: + var->var->interface_type = NULL; + break; + } + break; + + case vtn_variable_mode_input: + case vtn_variable_mode_output: { + /* For inputs and outputs, we immediately split structures. This + * is for a couple of reasons. For one, builtins may all come in + * a struct and we really want those split out into separate + * variables. For another, interpolation qualifiers can be + * applied to members of the top-level struct ane we need to be + * able to preserve that information. + */ + + int array_length = -1; + struct vtn_type *interface_type = var->type; + if (b->shader->stage == MESA_SHADER_GEOMETRY && + glsl_type_is_array(var->type->type)) { + /* In Geometry shaders (and some tessellation), inputs come + * in per-vertex arrays. However, some builtins come in + * non-per-vertex, hence the need for the is_array check. In + * any case, there are no non-builtin arrays allowed so this + * check should be sufficient. + */ + interface_type = var->type->array_element; + array_length = glsl_get_length(var->type->type); + } + + if (glsl_type_is_struct(interface_type->type)) { + /* It's a struct. Split it. */ + unsigned num_members = glsl_get_length(interface_type->type); + var->members = ralloc_array(b, nir_variable *, num_members); + + for (unsigned i = 0; i < num_members; i++) { + const struct glsl_type *mtype = interface_type->members[i]->type; + if (array_length >= 0) + mtype = glsl_array_type(mtype, array_length); + + var->members[i] = rzalloc(b->shader, nir_variable); + var->members[i]->name = + ralloc_asprintf(var->members[i], "%s.%d", val->name, i); + var->members[i]->type = mtype; + var->members[i]->interface_type = + interface_type->members[i]->type; + var->members[i]->data.mode = nir_mode; + } + } else { + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->interface_type = interface_type->type; + var->var->data.mode = nir_mode; + } + + /* For inputs and outputs, we need to grab locations and builtin + * information from the interface type. + */ + vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); + break; + + case vtn_variable_mode_param: + unreachable("Not created through OpVariable"); + } + + case vtn_variable_mode_ubo: + case vtn_variable_mode_ssbo: + case vtn_variable_mode_push_constant: + /* These don't need actual variables. */ + break; + } + + if (count > 4) { + assert(count == 5); + nir_constant *constant = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + var->var->constant_initializer = + nir_constant_clone(constant, var->var); + } + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (var->mode == vtn_variable_mode_image || + var->mode == vtn_variable_mode_sampler) { + /* XXX: We still need the binding information in the nir_variable + * for these. We should fix that. + */ + var->var->data.binding = var->binding; + var->var->data.descriptor_set = var->descriptor_set; + + if (var->mode == vtn_variable_mode_image) + var->var->data.image.format = without_array->image_format; + } + + if (var->mode == vtn_variable_mode_local) { + assert(var->members == NULL && var->var != NULL); + nir_function_impl_add_variable(b->impl, var->var); + } else if (var->var) { + nir_shader_add_variable(b->shader, var->var); + } else if (var->members) { + unsigned count = glsl_get_length(without_array->type); + for (unsigned i = 0; i < count; i++) { + assert(var->members[i]->data.mode != nir_var_local); + nir_shader_add_variable(b->shader, var->members[i]); + } + } else { + assert(var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_push_constant); + } + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_access_chain *base, *chain; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_access_chain); + base = base_val->access_chain; + } + + chain = vtn_access_chain_extend(b, base, count - 4); + + unsigned idx = base->length; + for (int i = 4; i < count; i++) { + struct vtn_value *link_val = vtn_untyped_value(b, w[i]); + if (link_val->value_type == vtn_value_type_constant) { + chain->link[idx].mode = vtn_access_mode_literal; + chain->link[idx].id = link_val->constant->value.u[0]; + } else { + chain->link[idx].mode = vtn_access_mode_id; + chain->link[idx].id = w[i]; + } + idx++; + } + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = chain; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = chain; + } + break; + } + + case SpvOpCopyMemory: { + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); + + vtn_variable_copy(b, dest->access_chain, src->access_chain); + break; + } + + case SpvOpLoad: { + struct vtn_access_chain *src = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + if (src->var->mode == vtn_variable_mode_image || + src->var->mode == vtn_variable_mode_sampler) { + vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src); + break; + } + + case SpvOpStore: { + struct vtn_access_chain *dest = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); + break; + } + + case SpvOpArrayLength: { + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + const uint32_t offset = chain->var->type->offsets[w[4]]; + const uint32_t stride = chain->var->type->members[w[4]]->stride; + + unsigned chain_idx; + struct vtn_type *type; + nir_ssa_def *index = + get_vulkan_resource_index(b, chain, &type, &chain_idx); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_get_buffer_size); + instr->src[0] = nir_src_for_ssa(index); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + nir_ssa_def *buf_size = &instr->dest.ssa; + + /* array_length = max(buffer_size - offset, 0) / stride */ + nir_ssa_def *array_length = + nir_idiv(&b->nb, + nir_imax(&b->nb, + nir_isub(&b->nb, + buf_size, + nir_imm_int(&b->nb, offset)), + nir_imm_int(&b->nb, 0u)), + nir_imm_int(&b->nb, stride)); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); + val->ssa->def = array_length; + break; + } + + case SpvOpCopyMemorySized: + default: + unreachable("Unhandled opcode"); + } +} diff --git a/src/compiler/nir/spirv2nir.c b/src/compiler/nir/spirv2nir.c new file mode 100644 index 00000000000..c837186bdfc --- /dev/null +++ b/src/compiler/nir/spirv2nir.c @@ -0,0 +1,55 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +/* + * A simple executable that opens a SPIR-V shader, converts it to NIR, and + * dumps out the result. This should be useful for testing the + * spirv_to_nir code. + */ + +#include "spirv/nir_spirv.h" + +#include <sys/mman.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> + +int main(int argc, char **argv) +{ + int fd = open(argv[1], O_RDONLY); + off_t len = lseek(fd, 0, SEEK_END); + + assert(len % 4 == 0); + size_t word_count = len / 4; + + const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + assert(map != NULL); + + nir_function *func = spirv_to_nir(map, word_count, NULL, 0, + MESA_SHADER_FRAGMENT, "main", NULL); + nir_print_shader(func->shader, stderr); +} diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index a87dcd8dc6a..00703fe6f52 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -70,6 +70,18 @@ glsl_get_struct_field(const glsl_type *type, unsigned index) return type->fields.structure[index].type; } +const glsl_type * +glsl_get_function_return_type(const glsl_type *type) +{ + return type->fields.parameters[0].type; +} + +const glsl_function_param * +glsl_get_function_param(const glsl_type *type, unsigned index) +{ + return &type->fields.parameters[index + 1]; +} + const struct glsl_type * glsl_get_column_type(const struct glsl_type *type) { @@ -112,12 +124,33 @@ glsl_get_aoa_size(const struct glsl_type *type) return type->arrays_of_arrays_size(); } +unsigned +glsl_count_attribute_slots(const struct glsl_type *type, + bool vertex_input_slots) +{ + return type->count_attribute_slots(vertex_input_slots); +} + const char * glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) { return type->fields.structure[index].name; } +glsl_sampler_dim +glsl_get_sampler_dim(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); + return (glsl_sampler_dim)type->sampler_dimensionality; +} + +glsl_base_type +glsl_get_sampler_result_type(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); + return (glsl_base_type)type->sampler_type; +} + unsigned glsl_get_record_location_offset(const struct glsl_type *type, unsigned length) @@ -132,6 +165,12 @@ glsl_type_is_void(const glsl_type *type) } bool +glsl_type_is_error(const glsl_type *type) +{ + return type->is_error(); +} + +bool glsl_type_is_vector(const struct glsl_type *type) { return type->is_vector(); @@ -155,6 +194,44 @@ glsl_type_is_matrix(const struct glsl_type *type) return type->is_matrix(); } +bool +glsl_type_is_array(const struct glsl_type *type) +{ + return type->is_array(); +} + +bool +glsl_type_is_struct(const struct glsl_type *type) +{ + return type->is_record() || type->is_interface(); +} + +bool +glsl_type_is_sampler(const struct glsl_type *type) +{ + return type->is_sampler(); +} + +bool +glsl_type_is_image(const struct glsl_type *type) +{ + return type->is_image(); +} + +bool +glsl_sampler_type_is_shadow(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_shadow; +} + +bool +glsl_sampler_type_is_array(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); + return type->sampler_array; +} + const glsl_type * glsl_void_type(void) { @@ -180,13 +257,80 @@ glsl_vec4_type(void) } const glsl_type * +glsl_int_type(void) +{ + return glsl_type::int_type; +} + +const glsl_type * glsl_uint_type(void) { return glsl_type::uint_type; } const glsl_type * +glsl_bool_type(void) +{ + return glsl_type::bool_type; +} + +const glsl_type * +glsl_scalar_type(enum glsl_base_type base_type) +{ + return glsl_type::get_instance(base_type, 1, 1); +} + +const glsl_type * +glsl_vector_type(enum glsl_base_type base_type, unsigned components) +{ + assert(components > 1 && components <= 4); + return glsl_type::get_instance(base_type, components, 1); +} + +const glsl_type * +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) +{ + assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4); + return glsl_type::get_instance(base_type, rows, columns); +} + +const glsl_type * glsl_array_type(const glsl_type *base, unsigned elements) { return glsl_type::get_array_instance(base, elements); } + +const glsl_type * +glsl_struct_type(const glsl_struct_field *fields, + unsigned num_fields, const char *name) +{ + return glsl_type::get_record_instance(fields, num_fields, name); +} + +const struct glsl_type * +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); +} + +const struct glsl_type * +glsl_image_type(enum glsl_sampler_dim dim, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_image_instance(dim, is_array, base_type); +} + +const glsl_type * +glsl_function_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) +{ + return glsl_type::get_function_instance(return_type, params, num_params); +} + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 32fc7661159..4ef0dcf9a31 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -50,6 +50,12 @@ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); +const struct glsl_type * +glsl_get_function_return_type(const struct glsl_type *type); + +const struct glsl_function_param * +glsl_get_function_param(const struct glsl_type *type, unsigned index); + enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); unsigned glsl_get_vector_elements(const struct glsl_type *type); @@ -62,25 +68,59 @@ unsigned glsl_get_length(const struct glsl_type *type); unsigned glsl_get_aoa_size(const struct glsl_type *type); +unsigned glsl_count_attribute_slots(const struct glsl_type *type, + bool vertex_input_slots); + const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); + unsigned glsl_get_record_location_offset(const struct glsl_type *type, unsigned length); bool glsl_type_is_void(const struct glsl_type *type); +bool glsl_type_is_error(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_array(const struct glsl_type *type); +bool glsl_type_is_struct(const struct glsl_type *type); +bool glsl_type_is_sampler(const struct glsl_type *type); +bool glsl_type_is_image(const struct glsl_type *type); +bool glsl_sampler_type_is_shadow(const struct glsl_type *type); +bool glsl_sampler_type_is_array(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); const struct glsl_type *glsl_vec_type(unsigned n); const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_int_type(void); const struct glsl_type *glsl_uint_type(void); +const struct glsl_type *glsl_bool_type(void); + +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type); +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, + unsigned components); +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, + unsigned rows, unsigned columns); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, + unsigned num_fields, const char *name); +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, + bool is_shadow, bool is_array, + enum glsl_base_type base_type); +const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim, + bool is_array, + enum glsl_base_type base_type); +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, + const struct glsl_function_param *params, + unsigned num_params); + +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); #ifdef __cplusplus } diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index 942d152b129..ff2f564dc98 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -201,6 +201,7 @@ gl_system_value_name(gl_system_value sysval) static const char *names[] = { ENUM(SYSTEM_VALUE_VERTEX_ID), ENUM(SYSTEM_VALUE_INSTANCE_ID), + ENUM(SYSTEM_VALUE_INSTANCE_INDEX), ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE), ENUM(SYSTEM_VALUE_BASE_VERTEX), ENUM(SYSTEM_VALUE_INVOCATION_ID), @@ -214,6 +215,8 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER), ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER), ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), + ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX), + ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_WORK_GROUP_ID), ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), ENUM(SYSTEM_VALUE_VERTEX_CNT), diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index efc0b0d515e..e3f46e3d739 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -379,6 +379,13 @@ typedef enum SYSTEM_VALUE_INSTANCE_ID, /** + * Vulkan InstanceIndex. + * + * InstanceIndex = gl_InstanceID + gl_BaseInstance + */ + SYSTEM_VALUE_INSTANCE_INDEX, + + /** * DirectX-style vertex ID. * * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include @@ -452,6 +459,8 @@ typedef enum */ /*@{*/ SYSTEM_VALUE_LOCAL_INVOCATION_ID, + SYSTEM_VALUE_LOCAL_INVOCATION_INDEX, + SYSTEM_VALUE_GLOBAL_INVOCATION_ID, SYSTEM_VALUE_WORK_GROUP_ID, SYSTEM_VALUE_NUM_WORK_GROUPS, /*@}*/ diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp new file mode 100644 index 00000000000..438caacfd4d --- /dev/null +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" +#include "../program.h" +#include "program/hash_table.h" +#include "ir_uniform.h" + +extern "C" { +#include "main/compiler.h" +#include "main/mtypes.h" +#include "program/prog_parameter.h" +#include "program/program.h" +} + +static void +add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) +{ + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, + indirect); +} + +static unsigned +get_sampler_index(const struct gl_shader_program *shader_program, + gl_shader_stage stage, const char *name) +{ + unsigned location; + if (!shader_program->UniformHash->get(location, name)) { + assert(!"failed to find sampler"); + return 0; + } + + if (!shader_program->UniformStorage[location].sampler[stage].active) { + assert(!"cannot return a sampler"); + return 0; + } + + return shader_program->UniformStorage[location].sampler[stage].index; +} + +static void +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, void *mem_ctx) +{ + if (instr->sampler == NULL) + return; + + /* Get the name and the offset */ + instr->sampler_index = 0; + char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name); + + for (nir_deref *deref = &instr->sampler->deref; + deref->child; deref = deref->child) { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref->child); + + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + if (deref_array->deref.child) { + ralloc_asprintf_append(&name, "[%u]", + deref_array->deref_array_type == nir_deref_array_type_direct ? + deref_array->base_offset : 0); + } else { + assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); + instr->sampler_index = deref_array->base_offset; + } + + /* XXX: We're assuming here that the indirect is the last array + * thing we have. This should be ok for now as we don't support + * arrays_of_arrays yet. + */ + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_move_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + &deref_array->indirect); + + instr->sampler_array_size = glsl_get_length(deref->type); + } + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child); + const char *field = glsl_get_struct_elem_name(deref->type, + deref_struct->index); + ralloc_asprintf_append(&name, ".%s", field); + break; + } + + default: + unreachable("Invalid deref type"); + break; + } + } + + instr->sampler_index += get_sampler_index(shader_program, stage, name); + + instr->sampler = NULL; +} + +typedef struct { + void *mem_ctx; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; +} lower_state; + +static bool +lower_block_cb(nir_block *block, void *_state) +{ + lower_state *state = (lower_state *) _state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + lower_sampler(tex_instr, state->shader_program, state->stage, + state->mem_ctx); + } + } + + return true; +} + +static void +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) +{ + lower_state state; + + state.mem_ctx = ralloc_parent(impl); + state.shader_program = shader_program; + state.stage = stage; + + nir_foreach_block(impl, lower_block_cb, &state); +} + +extern "C" void +nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + lower_impl(overload->impl, shader_program, shader->stage); + } +} + +static bool +lower_samplers_for_vk_block(nir_block *block, void *data) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + assert(tex->sampler); + + tex->sampler_set = tex->sampler->var->data.descriptor_set; + tex->sampler_index = tex->sampler->var->data.binding; + + if (tex->sampler->deref.child) { + assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); + + /* Only one-level arrays are allowed in vulkan */ + assert(arr->deref.child == NULL); + + tex->sampler_index += arr->base_offset; + if (arr->deref_array_type == nir_deref_array_type_indirect) { + add_indirect_to_tex(tex, arr->indirect); + nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); + + tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); + } + } + + tex->sampler = NULL; + } + + return true; +} + +extern "C" void +nir_lower_samplers_for_vk(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); + } + } +} diff --git a/src/isl/.gitignore b/src/isl/.gitignore new file mode 100644 index 00000000000..e9cfd67b94e --- /dev/null +++ b/src/isl/.gitignore @@ -0,0 +1 @@ +/isl_format_layout.c diff --git a/src/isl/Makefile.am b/src/isl/Makefile.am new file mode 100644 index 00000000000..72f5460554f --- /dev/null +++ b/src/isl/Makefile.am @@ -0,0 +1,90 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . + +noinst_LTLIBRARIES = libisl.la + +EXTRA_DIST = tests + +# The gallium includes are for the util/u_math.h include from main/macros.h +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src + +libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init + +libisl_la_SOURCES = \ + isl.c \ + isl.h \ + isl_format.c \ + isl_format_layout.c \ + isl_gen4.c \ + isl_gen4.h \ + isl_gen6.c \ + isl_gen6.h \ + isl_gen7.c \ + isl_gen7.h \ + isl_gen8.c \ + isl_gen8.h \ + isl_gen9.c \ + isl_gen9.h \ + isl_image.c \ + $(NULL) + +BUILT_SOURCES = \ + isl_format_layout.c + +isl_format_layout.c: isl_format_layout_gen.bash \ + isl_format_layout.csv + $(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \ + <$(srcdir)/isl_format_layout.csv >$@ + +# ---------------------------------------------------------------------------- +# Tests +# ---------------------------------------------------------------------------- + +TESTS = tests/isl_surf_get_image_offset_test + +check_PROGRAMS = $(TESTS) + +# Link tests to lib965_compiler.la for brw_get_device_info(). +tests_ldadd = \ + libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +tests_isl_surf_get_image_offset_test_SOURCES = \ + tests/isl_surf_get_image_offset_test.c +tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd) + +# ---------------------------------------------------------------------------- + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/isl/README b/src/isl/README new file mode 100644 index 00000000000..1ab4313fcc5 --- /dev/null +++ b/src/isl/README @@ -0,0 +1,113 @@ +Intel Surface Layout + +Introduction +============ +isl is a small library that calculates the layout of Intel GPU surfaces, queries +those layouts, and queries the properties of surface formats. + + +Independence from User APIs +=========================== +isl's API is independent of any user-facing graphics API, such as OpenGL and +Vulkan. This independence allows isl to be used a shared component by multiple +Intel drivers. + +Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel +hardware: the actual memory layout of Intel GPU surfaces and how one programs +the GPU to use those surfaces. For example: + + - The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`) + match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL + or Vulkan format tokens. And the values of `isl_format` and + `SURFACE_FORMAT` are identical. + + - The OpenGL and Vulkan APIs contain depth and stencil formats. However the + hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum + isl_format`. Rather than define new pixel formats that have no hardware + counterpart, isl records the intent to use a surface as a depth or stencil + buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and + `ISL_SURF_USAGE_STENCIL_BIT`. + + - `struct isl_surf` distinguishes between the surface's logical dimension + from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D, + or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`). + + +Surface Units +============= + +Intro +----- +ISL takes care in its equations to correctly handle conversion among surface +units (such as pixels and compression blocks) and to carefully distinguish +between a surface's logical layout in the client API and its physical layout +in memory. + +Symbol names often explicitly declare their unit with a suffix: + + - px: logical pixels + - sa: physical surface samples + - el: physical surface elements + - sa_rows: rows of physical surface samples + - el_rows: rows of physical surface elements + +Logical units are independent of hardware generation and are closely related +to the user-facing API (OpenGL and Vulkan). Physical units are dependent on +hardware generation and reflect the surface's layout in memory. + +Definitions +----------- +- Logical Pixels (px): + + The surface's layout from the perspective of the client API (OpenGL and + Vulkan) is in units of logical pixels. Logical pixels are independent of the + surface's layout in memory. + + A surface's width and height, in units of logical pixels, is not affected by + the surface's sample count. For example, consider a VkImage created with + VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and + height at level 0 is, in units of logical pixels, w0 and h0 regardless of + the value of s0. + + For example, the logical array length of a 3D surface is always 1, even on + Gen9 where the surface's memory layout is that of an array surface + (ISL_DIM_LAYOUT_GEN4_2D). + +- Physical Surface Samples (sa): + + For a multisampled surface, this unit has the obvious meaning. + A singlesampled surface, from ISL's perspective, is simply a multisampled + surface whose sample count is 1. + + For example, consider a 2D single-level non-array surface with samples=4, + width_px=64, and height_px=64 (note that the suffix 'px' indicates logical + pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED, + then the extent of level 0 is, in units of physical surface samples, + width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If + ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1, + array_length_sa=4. + +- Physical Surface Elements (el): + + This unit allows ISL to treat compressed and uncompressed formats + identically in many calculations. + + If the surface's pixel format is compressed, such as ETC2, then a surface + element is equivalent to a compression block. If uncompressed, then + a surface element is equivalent to a surface sample. As a corollary, for + a given surface a surface element is at least as large as a surface sample. + +Errata +------ +ISL acquired the term 'surface element' from the Broadwell PRM [1], which +defines it as follows: + + An element is defined as a pixel in uncompresed surface formats, and as + a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL + type multisampled surfaces, an element is a sample. + + +References +========== +[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >> + RENDER_SURFACE_STATE Surface Vertical Alignment (p325) diff --git a/src/isl/isl.c b/src/isl/isl.c new file mode 100644 index 00000000000..ec6323741e8 --- /dev/null +++ b/src/isl/isl.c @@ -0,0 +1,1384 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "isl.h" +#include "isl_gen4.h" +#include "isl_gen6.h" +#include "isl_gen7.h" +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...) +{ + va_list ap; + char buf[512]; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf); +} + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info, + bool has_bit6_swizzling) +{ + dev->info = info; + dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6; + dev->has_bit6_swizzling = has_bit6_swizzling; + + /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some + * device properties at buildtime. Verify that the macros with the device + * properties chosen during runtime. + */ + assert(ISL_DEV_GEN(dev) == dev->info->gen); + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev) == dev->use_separate_stencil); + + /* Did we break hiz or stencil? */ + if (ISL_DEV_USE_SEPARATE_STENCIL(dev)) + assert(info->has_hiz_and_separate_stencil); + if (info->must_use_separate_stencil) + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); +} + +/** + * @brief Query the set of multisamples supported by the device. + * + * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always + * supported. + */ +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev) +{ + if (ISL_DEV_GEN(dev) >= 9) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT | + ISL_SAMPLE_COUNT_16_BIT; + } else if (ISL_DEV_GEN(dev) >= 8) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_2_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 7) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT | + ISL_SAMPLE_COUNT_8_BIT; + } else if (ISL_DEV_GEN(dev) >= 6) { + return ISL_SAMPLE_COUNT_1_BIT | + ISL_SAMPLE_COUNT_4_BIT; + } else { + return ISL_SAMPLE_COUNT_1_BIT; + } +} + +/** + * @param[out] info is written only on success + */ +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *tile_info) +{ + const uint32_t bs = format_block_size; + uint32_t width, height; + + assert(bs > 0); + + switch (tiling) { + case ISL_TILING_LINEAR: + width = 1; + height = 1; + break; + + case ISL_TILING_X: + width = 1 << 9; + height = 1 << 3; + break; + + case ISL_TILING_Y0: + width = 1 << 7; + height = 1 << 5; + break; + + case ISL_TILING_W: + /* XXX: Should W tile be same as Y? */ + width = 1 << 6; + height = 1 << 6; + break; + + case ISL_TILING_Yf: + case ISL_TILING_Ys: { + if (ISL_DEV_GEN(dev) < 9) + return false; + + if (!isl_is_pow2(bs)) + return false; + + bool is_Ys = tiling == ISL_TILING_Ys; + + width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys)); + height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys)); + break; + } + } /* end switch */ + + *tile_info = (struct isl_tile_info) { + .tiling = tiling, + .width = width, + .height = height, + .size = width * height, + }; + + return true; +} + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e) +{ + struct isl_tile_info tile_info; + isl_tiling_get_info(dev, tiling, format_block_size, &tile_info); + *e = isl_extent2d(tile_info.width, tile_info.height); +} + +/** + * @param[out] tiling is set only on success + */ +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling) +{ + isl_tiling_flags_t tiling_flags = info->tiling_flags; + + if (ISL_DEV_GEN(dev) >= 7) { + gen7_filter_tiling(dev, info, &tiling_flags); + } else { + isl_finishme("%s: gen%u", __func__, ISL_DEV_GEN(dev)); + gen7_filter_tiling(dev, info, &tiling_flags); + } + + #define CHOOSE(__tiling) \ + do { \ + if (tiling_flags & (1u << (__tiling))) { \ + *tiling = (__tiling); \ + return true; \ + } \ + } while (0) + + /* Of the tiling modes remaining, choose the one that offers the best + * performance. + */ + + if (info->dim == ISL_SURF_DIM_1D) { + /* Prefer linear for 1D surfaces because they do not benefit from + * tiling. To the contrary, tiling leads to wasted memory and poor + * memory locality due to the swizzling and alignment restrictions + * required in tiled surfaces. + */ + CHOOSE(ISL_TILING_LINEAR); + } + + CHOOSE(ISL_TILING_Ys); + CHOOSE(ISL_TILING_Yf); + CHOOSE(ISL_TILING_Y0); + CHOOSE(ISL_TILING_X); + CHOOSE(ISL_TILING_W); + CHOOSE(ISL_TILING_LINEAR); + + #undef CHOOSE + + /* No tiling mode accomodates the inputs. */ + return false; +} + +static bool +isl_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + if (ISL_DEV_GEN(dev) >= 8) { + return gen8_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 7) { + return gen7_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else if (ISL_DEV_GEN(dev) >= 6) { + return gen6_choose_msaa_layout(dev, info, tiling, msaa_layout); + } else { + return gen4_choose_msaa_layout(dev, info, tiling, msaa_layout); + } +} + +static void +isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, + uint32_t *width, uint32_t *height) +{ + assert(isl_is_pow2(samples)); + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: [...] + */ + if (width) + *width = isl_align(*width, 2) << ((ffs(samples) - 0) / 2); + if (height) + *height = isl_align(*height, 2) << ((ffs(samples) - 1) / 2); +} + +static enum isl_array_pitch_span +isl_choose_array_pitch_span(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + const struct isl_extent4d *phys_level0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + case ISL_DIM_LAYOUT_GEN4_2D: + if (ISL_DEV_GEN(dev) >= 8) { + /* QPitch becomes programmable in Broadwell. So choose the + * most compact QPitch possible in order to conserve memory. + * + * From the Broadwell PRM >> Volume 2d: Command Reference: Structures + * >> RENDER_SURFACE_STATE Surface QPitch (p325): + * + * - Software must ensure that this field is set to a value + * sufficiently large such that the array slices in the surface + * do not overlap. Refer to the Memory Data Formats section for + * information on how surfaces are stored in memory. + * + * - This field specifies the distance in rows between array + * slices. It is used only in the following cases: + * + * - Surface Array is enabled OR + * - Number of Mulitsamples is not NUMSAMPLES_1 and + * Multisampled Surface Storage Format set to MSFMT_MSS OR + * - Surface Type is SURFTYPE_CUBE + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else if (ISL_DEV_GEN(dev) >= 7) { + /* Note that Ivybridge introduces + * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the + * driver more control over the QPitch. + */ + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) { + /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >> + * Section 6.18.4.7: Surface Arrays (p112): + * + * If Surface Array Spacing is set to ARYSPC_FULL (note that + * the depth buffer and stencil buffer have an implied value of + * ARYSPC_FULL): + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (info->levels == 1) { + /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing + * to ARYSPC_LOD0. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, thus + * the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } else { + if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) && + ISL_DEV_USE_SEPARATE_STENCIL(dev) && + isl_surf_usage_is_stencil(info->usage)) { + /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * The separate stencil buffer does not support mip mapping, + * thus the storage for LODs other than LOD 0 is not needed. + */ + assert(info->levels == 1); + assert(phys_level0_sa->array_len == 1); + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + if (phys_level0_sa->array_len == 1) { + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + return ISL_ARRAY_PITCH_SPAN_FULL; + } + + case ISL_DIM_LAYOUT_GEN4_3D: + /* The hardware will never use the QPitch. So choose the most + * compact QPitch possible in order to conserve memory. + */ + return ISL_ARRAY_PITCH_SPAN_COMPACT; + } + + unreachable("bad isl_dim_layout"); + return ISL_ARRAY_PITCH_SPAN_FULL; +} + +static void +isl_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + if (ISL_DEV_GEN(dev) >= 9) { + gen9_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 8) { + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 7) { + gen7_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else if (ISL_DEV_GEN(dev) >= 6) { + gen6_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } else { + gen4_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); + } +} + +static enum isl_dim_layout +isl_surf_choose_dim_layout(const struct isl_device *dev, + enum isl_surf_dim logical_dim) +{ + if (ISL_DEV_GEN(dev) >= 9) { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + return ISL_DIM_LAYOUT_GEN9_1D; + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_2D; + } + } else { + switch (logical_dim) { + case ISL_SURF_DIM_1D: + case ISL_SURF_DIM_2D: + return ISL_DIM_LAYOUT_GEN4_2D; + case ISL_SURF_DIM_3D: + return ISL_DIM_LAYOUT_GEN4_3D; + } + } + + unreachable("bad isl_surf_dim"); + return ISL_DIM_LAYOUT_GEN4_2D; +} + +/** + * Calculate the physical extent of the surface's first level, in units of + * surface samples. The result is aligned to the format's compression block. + */ +static void +isl_calc_phys_level0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent4d *phys_level0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + if (isl_format_is_yuv(info->format)) + isl_finishme("%s:%s: YUV format", __FILE__, __func__); + + switch (info->dim) { + case ISL_SURF_DIM_1D: + assert(info->height == 1); + assert(info->depth == 1); + assert(info->samples == 1); + assert(!isl_format_is_compressed(info->format)); + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN4_3D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN9_1D: + case ISL_DIM_LAYOUT_GEN4_2D: + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = 1, + .d = 1, + .a = info->array_len, + }; + break; + } + break; + + case ISL_SURF_DIM_2D: + assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D); + + if (tiling == ISL_TILING_Ys && info->samples > 1) + isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + assert(info->depth == 1); + assert(info->samples == 1); + + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = 1, + .a = info->array_len, + }; + break; + + case ISL_MSAA_LAYOUT_ARRAY: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = info->samples, + }; + break; + + case ISL_MSAA_LAYOUT_INTERLEAVED: + assert(info->depth == 1); + assert(info->array_len == 1); + assert(!isl_format_is_compressed(info->format)); + + *phys_level0_sa = (struct isl_extent4d) { + .w = info->width, + .h = info->height, + .d = 1, + .a = 1, + }; + + isl_msaa_interleaved_scale_px_to_sa(info->samples, + &phys_level0_sa->w, + &phys_level0_sa->h); + break; + } + break; + + case ISL_SURF_DIM_3D: + assert(info->array_len == 1); + assert(info->samples == 1); + + if (fmtl->bd > 1) { + isl_finishme("%s:%s: compression block with depth > 1", + __FILE__, __func__); + } + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + unreachable("bad isl_dim_layout"); + + case ISL_DIM_LAYOUT_GEN4_2D: + assert(ISL_DEV_GEN(dev) >= 9); + + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = 1, + .a = info->depth, + }; + break; + + case ISL_DIM_LAYOUT_GEN4_3D: + assert(ISL_DEV_GEN(dev) < 9); + *phys_level0_sa = (struct isl_extent4d) { + .w = isl_align(info->width, fmtl->bw), + .h = isl_align(info->height, fmtl->bh), + .d = info->depth, + .a = 1, + }; + break; + } + break; + } +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_2d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(phys_level0_sa->depth == 1); + + if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) { + /* Do not pad the surface to the image alignment. Instead, pad it only + * to the pixel format's block alignment. + * + * For tiled surfaces, using a reduced alignment here avoids wasting CPU + * cycles on the below mipmap layout caluclations. Reducing the + * alignment here is safe because we later align the row pitch and array + * pitch to the tile boundary. It is safe even for + * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled + * to accomodate the interleaved samples. + * + * For linear surfaces, reducing the alignment here permits us to later + * choose an arbitrary, non-aligned row pitch. If the surface backs + * a VkBuffer, then an arbitrary pitch may be needed to accomodate + * VkBufferImageCopy::bufferRowLength. + */ + *phys_slice0_sa = (struct isl_extent2d) { + .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), + .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), + }; + return; + } + + uint32_t slice_top_w = 0; + uint32_t slice_bottom_w = 0; + uint32_t slice_left_h = 0; + uint32_t slice_right_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t H = isl_minify(H0, l); + + if (msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { + /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level + * Sizes (p133): + * + * If the surface is multisampled and it is a depth or stencil + * surface or Multisampled Surface StorageFormat in + * SURFACE_STATE is MSFMT_DEPTH_STENCIL, W_L and H_L must be + * adjusted as follows before proceeding: [...] + */ + isl_msaa_interleaved_scale_px_to_sa(info->samples, &W, &H); + } + + uint32_t w = isl_align_npot(W, image_align_sa->w); + uint32_t h = isl_align_npot(H, image_align_sa->h); + + if (l == 0) { + slice_top_w = w; + slice_left_h = h; + slice_right_h = h; + } else if (l == 1) { + slice_bottom_w = w; + slice_left_h += h; + } else if (l == 2) { + slice_bottom_w += w; + slice_right_h += h; + } else { + slice_right_h += h; + } + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = MAX(slice_top_w, slice_bottom_w), + .h = MAX(slice_left_h, slice_right_h), + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen4_3d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + assert(info->samples == 1); + assert(phys_level0_sa->array_len == 1); + + uint32_t slice_w = 0; + uint32_t slice_h = 0; + + uint32_t W0 = phys_level0_sa->w; + uint32_t H0 = phys_level0_sa->h; + uint32_t D0 = phys_level0_sa->d; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w); + uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h); + uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa->d); + + uint32_t max_layers_horiz = MIN(level_d, 1u << l); + uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + slice_w = MAX(slice_w, level_w * max_layers_horiz); + slice_h += level_h * max_layers_vert; + } + + *phys_slice0_sa = (struct isl_extent2d) { + .w = slice_w, + .h = slice_h, + }; +} + +/** + * A variant of isl_calc_phys_slice0_extent_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +isl_calc_phys_slice0_extent_sa_gen9_1d( + const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(phys_level0_sa->height == 1); + assert(phys_level0_sa->depth == 1); + assert(info->samples == 1); + assert(image_align_sa->w >= fmtl->bw); + + uint32_t slice_w = 0; + const uint32_t W0 = phys_level0_sa->w; + + for (uint32_t l = 0; l < info->levels; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa->w); + + slice_w += w; + } + + *phys_slice0_sa = isl_extent2d(slice_w, 1); +} + +/** + * Calculate the physical extent of the surface's first array slice, in units + * of surface samples. If the surface is multi-leveled, then the result will + * be aligned to \a image_align_sa. + */ +static void +isl_calc_phys_slice0_extent_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_dim_layout dim_layout, + enum isl_msaa_layout msaa_layout, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + struct isl_extent2d *phys_slice0_sa) +{ + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + isl_calc_phys_slice0_extent_sa_gen9_1d(dev, info, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_2D: + isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout, + image_align_sa, phys_level0_sa, + phys_slice0_sa); + return; + case ISL_DIM_LAYOUT_GEN4_3D: + isl_calc_phys_slice0_extent_sa_gen4_3d(dev, info, image_align_sa, + phys_level0_sa, phys_slice0_sa); + return; + } +} + +/** + * Calculate the pitch between physical array slices, in units of rows of + * surface elements. + */ +static uint32_t +isl_calc_array_pitch_el_rows(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + enum isl_dim_layout dim_layout, + enum isl_array_pitch_span array_pitch_span, + const struct isl_extent3d *image_align_sa, + const struct isl_extent4d *phys_level0_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + uint32_t pitch_sa_rows = 0; + + switch (dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + /* Each row is an array slice */ + pitch_sa_rows = 1; + break; + case ISL_DIM_LAYOUT_GEN4_2D: + switch (array_pitch_span) { + case ISL_ARRAY_PITCH_SPAN_COMPACT: + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + case ISL_ARRAY_PITCH_SPAN_FULL: { + /* The QPitch equation is found in the Broadwell PRM >> Volume 5: + * Memory Views >> Common Surface Formats >> Surface Layout >> 2D + * Surfaces >> Surface Arrays. + */ + uint32_t H0_sa = phys_level0_sa->h; + uint32_t H1_sa = isl_minify(H0_sa, 1); + + uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h); + uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h); + + uint32_t m; + if (ISL_DEV_GEN(dev) >= 7) { + /* The QPitch equation changed slightly in Ivybridge. */ + m = 12; + } else { + m = 11; + } + + pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h); + + if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 && + (info->height % 4 == 1)) { + /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1: + * Graphics Core >> Section 7.18.3.7: Surface Arrays: + * + * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than + * the value calculated in the equation above , for every + * other odd Surface Height starting from 1 i.e. 1,5,9,13. + * + * XXX(chadv): Is the errata natural corollary of the physical + * layout of interleaved samples? + */ + pitch_sa_rows += 4; + } + + pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh); + } /* end case */ + break; + } + break; + case ISL_DIM_LAYOUT_GEN4_3D: + assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT); + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + break; + default: + unreachable("bad isl_dim_layout"); + break; + } + + assert(pitch_sa_rows % fmtl->bh == 0); + uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh; + + if (ISL_DEV_GEN(dev) >= 9 && + info->dim == ISL_SURF_DIM_3D && + tile_info->tiling != ISL_TILING_LINEAR) { + /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch: + * + * Tile Mode != Linear: This field must be set to an integer multiple + * of the tile height + */ + pitch_el_rows = isl_align(pitch_el_rows, tile_info->height); + } + + return pitch_el_rows; +} + +/** + * Calculate the pitch of each surface row, in bytes. + */ +static uint32_t +isl_calc_row_pitch(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + const struct isl_extent3d *image_align_sa, + const struct isl_extent2d *phys_slice0_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t row_pitch = info->min_pitch; + + /* First, align the surface to a cache line boundary, as the PRM explains + * below. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In order + * to avoid these GTT errors, “padding” at the bottom of the surface is + * sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * The sampling engine accesses texels outside of the surface if they + * are contained in the same cache line as texels that are within the + * surface. These texels will not participate in any calculation + * performed by the sampling engine and will not affect the result of + * any sampling engine operation, however if these texels lie outside of + * defined pages in the GTT, a GTT error will result when the cache line + * is accessed. In order to avoid these GTT errors, “padding” at the + * bottom and right side of a sampling engine surface is sometimes + * necessary. + * + * It is possible that a cache line will straddle a page boundary if the + * base address or pitch is not aligned. All pages included in the cache + * lines that are part of the surface must map to valid GTT entries to + * avoid errors. To determine the necessary padding on the bottom and + * right side of the surface, refer to the table in Alignment Unit Size + * section for the i and j parameters for the surface format in use. The + * surface must then be extended to the next multiple of the alignment + * unit size in each dimension, and all texels contained in this + * extended surface must have valid GTT entries. + * + * For example, suppose the surface size is 15 texels by 10 texels and + * the alignment parameters are i=4 and j=2. In this case, the extended + * surface would be 16 by 10. Note that these calculations are done in + * texels, and must be converted to bytes based on the surface format + * being used to determine whether additional pages need to be defined. + */ + assert(phys_slice0_sa->w % fmtl->bw == 0); + row_pitch = MAX(row_pitch, fmtl->bs * (phys_slice0_sa->w / fmtl->bw)); + + switch (tile_info->tiling) { + case ISL_TILING_LINEAR: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For linear render target surfaces and surfaces accessed with the + * typed data port messages, the pitch must be a multiple of the + * element size for non-YUV surface formats. Pitch must be + * a multiple of 2 * element size for YUV surface formats. + * + * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we + * ignore because isl doesn't do buffers.] + * + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { + if (isl_format_is_yuv(info->format)) { + row_pitch = isl_align_npot(row_pitch, 2 * fmtl->bs); + } else { + row_pitch = isl_align_npot(row_pitch, fmtl->bs); + } + } + break; + default: + /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >> + * RENDER_SURFACE_STATE Surface Pitch (p349): + * + * - For tiled surfaces, the pitch must be a multiple of the tile + * width. + */ + row_pitch = isl_align(row_pitch, tile_info->width); + break; + } + + return row_pitch; +} + +/** + * Calculate the surface's total height, including padding, in units of + * surface elements. + */ +static uint32_t +isl_calc_total_height_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + const struct isl_tile_info *tile_info, + uint32_t phys_array_len, + uint32_t row_pitch, + uint32_t array_pitch_el_rows) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + uint32_t total_h_el = phys_array_len * array_pitch_el_rows; + uint32_t pad_bytes = 0; + + /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Render Target and Media + * Surfaces: + * + * The data port accesses data (pixels) outside of the surface if they + * are contained in the same cache request as pixels that are within the + * surface. These pixels will not be returned by the requesting message, + * however if these pixels lie outside of defined pages in the GTT, + * a GTT error will result when the cache request is processed. In + * order to avoid these GTT errors, “padding” at the bottom of the + * surface is sometimes necessary. + * + * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface + * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces: + * + * ... Lots of padding requirements, all listed separately below. + */ + + /* We can safely ignore the first padding requirement, quoted below, + * because isl doesn't do buffers. + * + * - [pre-BDW] For buffers, which have no inherent “height,” padding + * requirements are different. A buffer must be padded to the next + * multiple of 256 array elements, with an additional 16 bytes added + * beyond that to account for the L1 cache line. + */ + + /* + * - For compressed textures [...], padding at the bottom of the surface + * is to an even compressed row. + */ + if (isl_format_is_compressed(info->format)) + total_h_el = isl_align(total_h_el, 2); + + /* + * - For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. + */ + if (info->usage & ISL_SURF_USAGE_CUBE_BIT) + total_h_el += 2; + + /* + * - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats, + * additional padding is required. These surfaces require an extra row + * plus 16 bytes of padding at the bottom in addition to the general + * padding requirements. + */ + if (isl_format_is_yuv(info->format) && + (fmtl->bs == 96 || fmtl->bs == 48|| fmtl->bs == 24)) { + total_h_el += 1; + pad_bytes += 16; + } + + /* + * - For linear surfaces, additional padding of 64 bytes is required at + * the bottom of the surface. This is in addition to the padding + * required above. + */ + if (tile_info->tiling == ISL_TILING_LINEAR) + pad_bytes += 64; + + /* The below text weakens, not strengthens, the padding requirements for + * linear surfaces. Therefore we can safely ignore it. + * + * - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array, + * non-MSAA, non-mip-mapped surfaces in linear memory, the only + * padding requirement is to the next aligned 64-byte boundary beyond + * the end of the surface. The rest of the padding requirements + * documented above do not apply to these surfaces. + */ + + /* + * - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and + * height % 4 != 0, the surface must be padded with + * 4-(height % 4)*Surface Pitch # of bytes. + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) { + total_h_el = isl_align(total_h_el, 4); + } + + /* + * - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded + * to 4 times the Surface Pitch # of bytes + */ + if (ISL_DEV_GEN(dev) >= 9 && + tile_info->tiling == ISL_TILING_LINEAR && + info->dim == ISL_SURF_DIM_1D) { + total_h_el += 4; + } + + /* Be sloppy. Align any leftover padding to a row boundary. */ + total_h_el += isl_align_div_npot(pad_bytes, row_pitch); + + return total_h_el; +} + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + const struct isl_extent4d logical_level0_px = { + .w = info->width, + .h = info->height, + .d = info->depth, + .a = info->array_len, + }; + + enum isl_dim_layout dim_layout = + isl_surf_choose_dim_layout(dev, info->dim); + + enum isl_tiling tiling; + if (!isl_surf_choose_tiling(dev, info, &tiling)) + return false; + + struct isl_tile_info tile_info; + if (!isl_tiling_get_info(dev, tiling, fmtl->bs, &tile_info)) + return false; + + enum isl_msaa_layout msaa_layout; + if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout)) + return false; + + struct isl_extent3d image_align_el; + isl_choose_image_alignment_el(dev, info, tiling, msaa_layout, + &image_align_el); + + struct isl_extent3d image_align_sa = + isl_extent3d_el_to_sa(info->format, image_align_el); + + struct isl_extent4d phys_level0_sa; + isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, + &phys_level0_sa); + assert(phys_level0_sa.w % fmtl->bw == 0); + assert(phys_level0_sa.h % fmtl->bh == 0); + + enum isl_array_pitch_span array_pitch_span = + isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); + + struct isl_extent2d phys_slice0_sa; + isl_calc_phys_slice0_extent_sa(dev, info, dim_layout, msaa_layout, + &image_align_sa, &phys_level0_sa, + &phys_slice0_sa); + assert(phys_slice0_sa.w % fmtl->bw == 0); + assert(phys_slice0_sa.h % fmtl->bh == 0); + + const uint32_t row_pitch = isl_calc_row_pitch(dev, info, &tile_info, + &image_align_sa, + &phys_slice0_sa); + + const uint32_t array_pitch_el_rows = + isl_calc_array_pitch_el_rows(dev, info, &tile_info, dim_layout, + array_pitch_span, &image_align_sa, + &phys_level0_sa, &phys_slice0_sa); + + const uint32_t total_h_el = + isl_calc_total_height_el(dev, info, &tile_info, + phys_level0_sa.array_len, row_pitch, + array_pitch_el_rows); + + const uint32_t total_h_sa = total_h_el * fmtl->bh; + const uint32_t size = row_pitch * isl_align(total_h_sa, tile_info.height); + + /* Alignment of surface base address, in bytes */ + uint32_t base_alignment = MAX(1, info->min_alignment); + assert(isl_is_pow2(base_alignment) && isl_is_pow2(tile_info.size)); + base_alignment = MAX(base_alignment, tile_info.size); + + *surf = (struct isl_surf) { + .dim = info->dim, + .dim_layout = dim_layout, + .msaa_layout = msaa_layout, + .tiling = tiling, + .format = info->format, + + .levels = info->levels, + .samples = info->samples, + + .image_alignment_el = image_align_el, + .logical_level0_px = logical_level0_px, + .phys_level0_sa = phys_level0_sa, + + .size = size, + .alignment = base_alignment, + .row_pitch = row_pitch, + .array_pitch_el_rows = array_pitch_el_rows, + .array_pitch_span = array_pitch_span, + + .usage = info->usage, + }; + + return true; +} + +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + isl_tiling_get_info(dev, surf->tiling, fmtl->bs, tile_info); +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_2D. + */ +static void +get_image_offset_sa_gen4_2d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.depth == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + + uint32_t x = 0; + uint32_t y = layer * isl_surf_get_array_pitch_sa_rows(surf); + + for (uint32_t l = 0; l < level; ++l) { + if (l == 1) { + uint32_t W = isl_minify(W0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, &W, NULL); + + x += isl_align_npot(W, image_align_sa.w); + } else { + uint32_t H = isl_minify(H0, l); + + if (surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) + isl_msaa_interleaved_scale_px_to_sa(surf->samples, NULL, &H); + + y += isl_align_npot(H, image_align_sa.h); + } + } + + *x_offset_sa = x; + *y_offset_sa = y; +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN4_3D. + */ +static void +get_image_offset_sa_gen4_3d(const struct isl_surf *surf, + uint32_t level, uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level)); + assert(surf->phys_level0_sa.array_len == 1); + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + const uint32_t W0 = surf->phys_level0_sa.width; + const uint32_t H0 = surf->phys_level0_sa.height; + const uint32_t D0 = surf->phys_level0_sa.depth; + + uint32_t x = 0; + uint32_t y = 0; + + for (uint32_t l = 0; l < level; ++l) { + const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, l), image_align_sa.d); + const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l); + + y += level_h * max_layers_vert; + } + + const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w); + const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h); + const uint32_t level_d = isl_align_npot(isl_minify(D0, level), image_align_sa.d); + + const uint32_t max_layers_horiz = MIN(level_d, 1u << level); + + x += level_w * (logical_z_offset_px % max_layers_horiz); + y += level_h * (logical_z_offset_px / max_layers_horiz); + + *x_offset_sa = x; + *y_offset_sa = y; +} + +/** + * A variant of isl_surf_get_image_offset_sa() specific to + * ISL_DIM_LAYOUT_GEN9_1D. + */ +static void +get_image_offset_sa_gen9_1d(const struct isl_surf *surf, + uint32_t level, uint32_t layer, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(layer < surf->phys_level0_sa.array_len); + assert(surf->phys_level0_sa.height == 1); + assert(surf->phys_level0_sa.depth == 1); + assert(surf->samples == 1); + + const uint32_t W0 = surf->phys_level0_sa.width; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + uint32_t x = 0; + + for (uint32_t l = 0; l < level; ++l) { + uint32_t W = isl_minify(W0, l); + uint32_t w = isl_align_npot(W, image_align_sa.w); + + x += w; + } + + *x_offset_sa = x; + *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf); +} + +/** + * Calculate the offset, in units of surface samples, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +static void +get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + switch (surf->dim_layout) { + case ISL_DIM_LAYOUT_GEN9_1D: + get_image_offset_sa_gen9_1d(surf, level, logical_array_layer, + x_offset_sa, y_offset_sa); + break; + case ISL_DIM_LAYOUT_GEN4_2D: + get_image_offset_sa_gen4_2d(surf, level, logical_array_layer + + logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; + case ISL_DIM_LAYOUT_GEN4_3D: + get_image_offset_sa_gen4_3d(surf, level, logical_z_offset_px, + x_offset_sa, y_offset_sa); + break; + } +} + +void +isl_surf_get_image_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(level < surf->levels); + assert(logical_array_layer < surf->logical_level0_px.array_len); + assert(logical_z_offset_px + < isl_minify(surf->logical_level0_px.depth, level)); + + uint32_t x_offset_sa, y_offset_sa; + get_image_offset_sa(surf, level, + logical_array_layer, + logical_z_offset_px, + &x_offset_sa, + &y_offset_sa); + + *x_offset_el = x_offset_sa / fmtl->bw; + *y_offset_el = y_offset_sa / fmtl->bh; +} + +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + struct isl_tile_info tile_info; + isl_surf_get_tile_info(dev, surf, &tile_info); + + uint32_t total_x_offset_el; + uint32_t total_y_offset_el; + isl_surf_get_image_offset_el(surf, level, + logical_array_layer, + logical_z_offset, + &total_x_offset_el, + &total_y_offset_el); + + uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; + uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; + uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; + + uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; + uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; + uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; + uint32_t big_x_offset_B = total_x_offset_B - small_x_offset_B; + + *base_address_offset = big_y_offset_B + big_x_offset_B; + *x_offset_el = small_x_offset_el; + *y_offset_el = small_y_offset_el; +} diff --git a/src/isl/isl.h b/src/isl/isl.h new file mode 100644 index 00000000000..bc7a315e8ae --- /dev/null +++ b/src/isl/isl.h @@ -0,0 +1,1015 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * @brief Intel Surface Layout + * + * Header Layout + * ------------- + * The header is ordered as: + * - forward declarations + * - macros that may be overridden at compile-time for specific gens + * - enums and constants + * - structs and unions + * - functions + */ + +#pragma once + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct brw_device_info; +struct brw_image_param; + +#ifndef ISL_DEV_GEN +/** + * @brief Get the hardware generation of isl_device. + * + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_GEN(dev)=9 ...`. + */ +#define ISL_DEV_GEN(__dev) ((__dev)->info->gen) +#endif + +#ifndef ISL_DEV_USE_SEPARATE_STENCIL +/** + * You can define this as a compile-time constant in the CFLAGS. For example, + * `gcc -DISL_DEV_USE_SEPARATE_STENCIL(dev)=1 ...`. + */ +#define ISL_DEV_USE_SEPARATE_STENCIL(__dev) ((__dev)->use_separate_stencil) +#endif + +/** + * Hardware enumeration SURFACE_FORMAT. + * + * For the official list, see Broadwell PRM: Volume 2b: Command Reference: + * Enumerations: SURFACE_FORMAT. + */ +enum isl_format { + ISL_FORMAT_R32G32B32A32_FLOAT = 0, + ISL_FORMAT_R32G32B32A32_SINT = 1, + ISL_FORMAT_R32G32B32A32_UINT = 2, + ISL_FORMAT_R32G32B32A32_UNORM = 3, + ISL_FORMAT_R32G32B32A32_SNORM = 4, + ISL_FORMAT_R64G64_FLOAT = 5, + ISL_FORMAT_R32G32B32X32_FLOAT = 6, + ISL_FORMAT_R32G32B32A32_SSCALED = 7, + ISL_FORMAT_R32G32B32A32_USCALED = 8, + ISL_FORMAT_R32G32B32A32_SFIXED = 32, + ISL_FORMAT_R64G64_PASSTHRU = 33, + ISL_FORMAT_R32G32B32_FLOAT = 64, + ISL_FORMAT_R32G32B32_SINT = 65, + ISL_FORMAT_R32G32B32_UINT = 66, + ISL_FORMAT_R32G32B32_UNORM = 67, + ISL_FORMAT_R32G32B32_SNORM = 68, + ISL_FORMAT_R32G32B32_SSCALED = 69, + ISL_FORMAT_R32G32B32_USCALED = 70, + ISL_FORMAT_R32G32B32_SFIXED = 80, + ISL_FORMAT_R16G16B16A16_UNORM = 128, + ISL_FORMAT_R16G16B16A16_SNORM = 129, + ISL_FORMAT_R16G16B16A16_SINT = 130, + ISL_FORMAT_R16G16B16A16_UINT = 131, + ISL_FORMAT_R16G16B16A16_FLOAT = 132, + ISL_FORMAT_R32G32_FLOAT = 133, + ISL_FORMAT_R32G32_SINT = 134, + ISL_FORMAT_R32G32_UINT = 135, + ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS = 136, + ISL_FORMAT_X32_TYPELESS_G8X24_UINT = 137, + ISL_FORMAT_L32A32_FLOAT = 138, + ISL_FORMAT_R32G32_UNORM = 139, + ISL_FORMAT_R32G32_SNORM = 140, + ISL_FORMAT_R64_FLOAT = 141, + ISL_FORMAT_R16G16B16X16_UNORM = 142, + ISL_FORMAT_R16G16B16X16_FLOAT = 143, + ISL_FORMAT_A32X32_FLOAT = 144, + ISL_FORMAT_L32X32_FLOAT = 145, + ISL_FORMAT_I32X32_FLOAT = 146, + ISL_FORMAT_R16G16B16A16_SSCALED = 147, + ISL_FORMAT_R16G16B16A16_USCALED = 148, + ISL_FORMAT_R32G32_SSCALED = 149, + ISL_FORMAT_R32G32_USCALED = 150, + ISL_FORMAT_R32G32_SFIXED = 160, + ISL_FORMAT_R64_PASSTHRU = 161, + ISL_FORMAT_B8G8R8A8_UNORM = 192, + ISL_FORMAT_B8G8R8A8_UNORM_SRGB = 193, + ISL_FORMAT_R10G10B10A2_UNORM = 194, + ISL_FORMAT_R10G10B10A2_UNORM_SRGB = 195, + ISL_FORMAT_R10G10B10A2_UINT = 196, + ISL_FORMAT_R10G10B10_SNORM_A2_UNORM = 197, + ISL_FORMAT_R8G8B8A8_UNORM = 199, + ISL_FORMAT_R8G8B8A8_UNORM_SRGB = 200, + ISL_FORMAT_R8G8B8A8_SNORM = 201, + ISL_FORMAT_R8G8B8A8_SINT = 202, + ISL_FORMAT_R8G8B8A8_UINT = 203, + ISL_FORMAT_R16G16_UNORM = 204, + ISL_FORMAT_R16G16_SNORM = 205, + ISL_FORMAT_R16G16_SINT = 206, + ISL_FORMAT_R16G16_UINT = 207, + ISL_FORMAT_R16G16_FLOAT = 208, + ISL_FORMAT_B10G10R10A2_UNORM = 209, + ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, + ISL_FORMAT_R11G11B10_FLOAT = 211, + ISL_FORMAT_R32_SINT = 214, + ISL_FORMAT_R32_UINT = 215, + ISL_FORMAT_R32_FLOAT = 216, + ISL_FORMAT_R24_UNORM_X8_TYPELESS = 217, + ISL_FORMAT_X24_TYPELESS_G8_UINT = 218, + ISL_FORMAT_L32_UNORM = 221, + ISL_FORMAT_A32_UNORM = 222, + ISL_FORMAT_L16A16_UNORM = 223, + ISL_FORMAT_I24X8_UNORM = 224, + ISL_FORMAT_L24X8_UNORM = 225, + ISL_FORMAT_A24X8_UNORM = 226, + ISL_FORMAT_I32_FLOAT = 227, + ISL_FORMAT_L32_FLOAT = 228, + ISL_FORMAT_A32_FLOAT = 229, + ISL_FORMAT_X8B8_UNORM_G8R8_SNORM = 230, + ISL_FORMAT_A8X8_UNORM_G8R8_SNORM = 231, + ISL_FORMAT_B8X8_UNORM_G8R8_SNORM = 232, + ISL_FORMAT_B8G8R8X8_UNORM = 233, + ISL_FORMAT_B8G8R8X8_UNORM_SRGB = 234, + ISL_FORMAT_R8G8B8X8_UNORM = 235, + ISL_FORMAT_R8G8B8X8_UNORM_SRGB = 236, + ISL_FORMAT_R9G9B9E5_SHAREDEXP = 237, + ISL_FORMAT_B10G10R10X2_UNORM = 238, + ISL_FORMAT_L16A16_FLOAT = 240, + ISL_FORMAT_R32_UNORM = 241, + ISL_FORMAT_R32_SNORM = 242, + ISL_FORMAT_R10G10B10X2_USCALED = 243, + ISL_FORMAT_R8G8B8A8_SSCALED = 244, + ISL_FORMAT_R8G8B8A8_USCALED = 245, + ISL_FORMAT_R16G16_SSCALED = 246, + ISL_FORMAT_R16G16_USCALED = 247, + ISL_FORMAT_R32_SSCALED = 248, + ISL_FORMAT_R32_USCALED = 249, + ISL_FORMAT_B5G6R5_UNORM = 256, + ISL_FORMAT_B5G6R5_UNORM_SRGB = 257, + ISL_FORMAT_B5G5R5A1_UNORM = 258, + ISL_FORMAT_B5G5R5A1_UNORM_SRGB = 259, + ISL_FORMAT_B4G4R4A4_UNORM = 260, + ISL_FORMAT_B4G4R4A4_UNORM_SRGB = 261, + ISL_FORMAT_R8G8_UNORM = 262, + ISL_FORMAT_R8G8_SNORM = 263, + ISL_FORMAT_R8G8_SINT = 264, + ISL_FORMAT_R8G8_UINT = 265, + ISL_FORMAT_R16_UNORM = 266, + ISL_FORMAT_R16_SNORM = 267, + ISL_FORMAT_R16_SINT = 268, + ISL_FORMAT_R16_UINT = 269, + ISL_FORMAT_R16_FLOAT = 270, + ISL_FORMAT_A8P8_UNORM_PALETTE0 = 271, + ISL_FORMAT_A8P8_UNORM_PALETTE1 = 272, + ISL_FORMAT_I16_UNORM = 273, + ISL_FORMAT_L16_UNORM = 274, + ISL_FORMAT_A16_UNORM = 275, + ISL_FORMAT_L8A8_UNORM = 276, + ISL_FORMAT_I16_FLOAT = 277, + ISL_FORMAT_L16_FLOAT = 278, + ISL_FORMAT_A16_FLOAT = 279, + ISL_FORMAT_L8A8_UNORM_SRGB = 280, + ISL_FORMAT_R5G5_SNORM_B6_UNORM = 281, + ISL_FORMAT_B5G5R5X1_UNORM = 282, + ISL_FORMAT_B5G5R5X1_UNORM_SRGB = 283, + ISL_FORMAT_R8G8_SSCALED = 284, + ISL_FORMAT_R8G8_USCALED = 285, + ISL_FORMAT_R16_SSCALED = 286, + ISL_FORMAT_R16_USCALED = 287, + ISL_FORMAT_P8A8_UNORM_PALETTE0 = 290, + ISL_FORMAT_P8A8_UNORM_PALETTE1 = 291, + ISL_FORMAT_A1B5G5R5_UNORM = 292, + ISL_FORMAT_A4B4G4R4_UNORM = 293, + ISL_FORMAT_L8A8_UINT = 294, + ISL_FORMAT_L8A8_SINT = 295, + ISL_FORMAT_R8_UNORM = 320, + ISL_FORMAT_R8_SNORM = 321, + ISL_FORMAT_R8_SINT = 322, + ISL_FORMAT_R8_UINT = 323, + ISL_FORMAT_A8_UNORM = 324, + ISL_FORMAT_I8_UNORM = 325, + ISL_FORMAT_L8_UNORM = 326, + ISL_FORMAT_P4A4_UNORM_PALETTE0 = 327, + ISL_FORMAT_A4P4_UNORM_PALETTE0 = 328, + ISL_FORMAT_R8_SSCALED = 329, + ISL_FORMAT_R8_USCALED = 330, + ISL_FORMAT_P8_UNORM_PALETTE0 = 331, + ISL_FORMAT_L8_UNORM_SRGB = 332, + ISL_FORMAT_P8_UNORM_PALETTE1 = 333, + ISL_FORMAT_P4A4_UNORM_PALETTE1 = 334, + ISL_FORMAT_A4P4_UNORM_PALETTE1 = 335, + ISL_FORMAT_Y8_UNORM = 336, + ISL_FORMAT_L8_UINT = 338, + ISL_FORMAT_L8_SINT = 339, + ISL_FORMAT_I8_UINT = 340, + ISL_FORMAT_I8_SINT = 341, + ISL_FORMAT_DXT1_RGB_SRGB = 384, + ISL_FORMAT_R1_UNORM = 385, + ISL_FORMAT_YCRCB_NORMAL = 386, + ISL_FORMAT_YCRCB_SWAPUVY = 387, + ISL_FORMAT_P2_UNORM_PALETTE0 = 388, + ISL_FORMAT_P2_UNORM_PALETTE1 = 389, + ISL_FORMAT_BC1_UNORM = 390, + ISL_FORMAT_BC2_UNORM = 391, + ISL_FORMAT_BC3_UNORM = 392, + ISL_FORMAT_BC4_UNORM = 393, + ISL_FORMAT_BC5_UNORM = 394, + ISL_FORMAT_BC1_UNORM_SRGB = 395, + ISL_FORMAT_BC2_UNORM_SRGB = 396, + ISL_FORMAT_BC3_UNORM_SRGB = 397, + ISL_FORMAT_MONO8 = 398, + ISL_FORMAT_YCRCB_SWAPUV = 399, + ISL_FORMAT_YCRCB_SWAPY = 400, + ISL_FORMAT_DXT1_RGB = 401, + ISL_FORMAT_FXT1 = 402, + ISL_FORMAT_R8G8B8_UNORM = 403, + ISL_FORMAT_R8G8B8_SNORM = 404, + ISL_FORMAT_R8G8B8_SSCALED = 405, + ISL_FORMAT_R8G8B8_USCALED = 406, + ISL_FORMAT_R64G64B64A64_FLOAT = 407, + ISL_FORMAT_R64G64B64_FLOAT = 408, + ISL_FORMAT_BC4_SNORM = 409, + ISL_FORMAT_BC5_SNORM = 410, + ISL_FORMAT_R16G16B16_FLOAT = 411, + ISL_FORMAT_R16G16B16_UNORM = 412, + ISL_FORMAT_R16G16B16_SNORM = 413, + ISL_FORMAT_R16G16B16_SSCALED = 414, + ISL_FORMAT_R16G16B16_USCALED = 415, + ISL_FORMAT_BC6H_SF16 = 417, + ISL_FORMAT_BC7_UNORM = 418, + ISL_FORMAT_BC7_UNORM_SRGB = 419, + ISL_FORMAT_BC6H_UF16 = 420, + ISL_FORMAT_PLANAR_420_8 = 421, + ISL_FORMAT_R8G8B8_UNORM_SRGB = 424, + ISL_FORMAT_ETC1_RGB8 = 425, + ISL_FORMAT_ETC2_RGB8 = 426, + ISL_FORMAT_EAC_R11 = 427, + ISL_FORMAT_EAC_RG11 = 428, + ISL_FORMAT_EAC_SIGNED_R11 = 429, + ISL_FORMAT_EAC_SIGNED_RG11 = 430, + ISL_FORMAT_ETC2_SRGB8 = 431, + ISL_FORMAT_R16G16B16_UINT = 432, + ISL_FORMAT_R16G16B16_SINT = 433, + ISL_FORMAT_R32_SFIXED = 434, + ISL_FORMAT_R10G10B10A2_SNORM = 435, + ISL_FORMAT_R10G10B10A2_USCALED = 436, + ISL_FORMAT_R10G10B10A2_SSCALED = 437, + ISL_FORMAT_R10G10B10A2_SINT = 438, + ISL_FORMAT_B10G10R10A2_SNORM = 439, + ISL_FORMAT_B10G10R10A2_USCALED = 440, + ISL_FORMAT_B10G10R10A2_SSCALED = 441, + ISL_FORMAT_B10G10R10A2_UINT = 442, + ISL_FORMAT_B10G10R10A2_SINT = 443, + ISL_FORMAT_R64G64B64A64_PASSTHRU = 444, + ISL_FORMAT_R64G64B64_PASSTHRU = 445, + ISL_FORMAT_ETC2_RGB8_PTA = 448, + ISL_FORMAT_ETC2_SRGB8_PTA = 449, + ISL_FORMAT_ETC2_EAC_RGBA8 = 450, + ISL_FORMAT_ETC2_EAC_SRGB8_A8 = 451, + ISL_FORMAT_R8G8B8_UINT = 456, + ISL_FORMAT_R8G8B8_SINT = 457, + ISL_FORMAT_RAW = 511, + + /* Hardware doesn't understand this out-of-band value */ + ISL_FORMAT_UNSUPPORTED = UINT16_MAX, +}; + +/** + * Numerical base type for channels of isl_format. + */ +enum isl_base_type { + ISL_VOID, + ISL_RAW, + ISL_UNORM, + ISL_SNORM, + ISL_UFLOAT, + ISL_SFLOAT, + ISL_UFIXED, + ISL_SFIXED, + ISL_UINT, + ISL_SINT, + ISL_USCALED, + ISL_SSCALED, +}; + +/** + * Colorspace of isl_format. + */ +enum isl_colorspace { + ISL_COLORSPACE_NONE = 0, + ISL_COLORSPACE_LINEAR, + ISL_COLORSPACE_SRGB, + ISL_COLORSPACE_YUV, +}; + +/** + * Texture compression mode of isl_format. + */ +enum isl_txc { + ISL_TXC_NONE = 0, + ISL_TXC_DXT1, + ISL_TXC_DXT3, + ISL_TXC_DXT5, + ISL_TXC_FXT1, + ISL_TXC_RGTC1, + ISL_TXC_RGTC2, + ISL_TXC_BPTC, + ISL_TXC_ETC1, + ISL_TXC_ETC2, +}; + +/** + * @brief Hardware tile mode + * + * WARNING: These values differ from the hardware enum values, which are + * unstable across hardware generations. + * + * Note that legacy Y tiling is ISL_TILING_Y0 instead of ISL_TILING_Y, to + * clearly distinguish it from Yf and Ys. + */ +enum isl_tiling { + ISL_TILING_LINEAR = 0, + ISL_TILING_W, + ISL_TILING_X, + ISL_TILING_Y0, /**< Legacy Y tiling */ + ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */ + ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */ +}; + +/** + * @defgroup Tiling Flags + * @{ + */ +typedef uint32_t isl_tiling_flags_t; +#define ISL_TILING_LINEAR_BIT (1u << ISL_TILING_LINEAR) +#define ISL_TILING_W_BIT (1u << ISL_TILING_W) +#define ISL_TILING_X_BIT (1u << ISL_TILING_X) +#define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) +#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) +#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) +#define ISL_TILING_ANY_MASK (~0u) +#define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) + +/** Any Y tiling, including legacy Y tiling. */ +#define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \ + ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) + +/** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */ +#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \ + ISL_TILING_Ys_BIT) +/** @} */ + +/** + * @brief Logical dimension of surface. + * + * Note: There is no dimension for cube map surfaces. ISL interprets cube maps + * as 2D array surfaces. + */ +enum isl_surf_dim { + ISL_SURF_DIM_1D, + ISL_SURF_DIM_2D, + ISL_SURF_DIM_3D, +}; + +/** + * @brief Physical layout of the surface's dimensions. + */ +enum isl_dim_layout { + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.3: 2D Surfaces. + * + * On many gens, 1D surfaces share the same layout as 2D surfaces. From + * the G35 PRM >> Volume 1: Graphics Core >> Section 6.17.2: 1D Surfaces: + * + * One-dimensional surfaces are identical to 2D surfaces with height of + * one. + * + * @invariant isl_surf::phys_level0_sa::depth == 1 + */ + ISL_DIM_LAYOUT_GEN4_2D, + + /** + * For details, see the G35 PRM >> Volume 1: Graphics Core >> Section + * 6.17.5: 3D Surfaces. + * + * @invariant isl_surf::phys_level0_sa::array_len == 1 + */ + ISL_DIM_LAYOUT_GEN4_3D, + + /** + * For details, see the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> » 1D Surfaces. + */ + ISL_DIM_LAYOUT_GEN9_1D, +}; + +/* TODO(chadv): Explain */ +enum isl_array_pitch_span { + ISL_ARRAY_PITCH_SPAN_FULL, + ISL_ARRAY_PITCH_SPAN_COMPACT, +}; + +/** + * @defgroup Surface Usage + * @{ + */ +typedef uint64_t isl_surf_usage_flags_t; +#define ISL_SURF_USAGE_RENDER_TARGET_BIT (1u << 0) +#define ISL_SURF_USAGE_DEPTH_BIT (1u << 1) +#define ISL_SURF_USAGE_STENCIL_BIT (1u << 2) +#define ISL_SURF_USAGE_TEXTURE_BIT (1u << 3) +#define ISL_SURF_USAGE_CUBE_BIT (1u << 4) +#define ISL_SURF_USAGE_DISABLE_AUX_BIT (1u << 5) +#define ISL_SURF_USAGE_DISPLAY_BIT (1u << 6) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT (1u << 7) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT (1u << 8) +#define ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT (1u << 9) +#define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) +#define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) +/** @} */ + +/** + * Identical to VkSampleCountFlagBits. + */ +enum isl_sample_count { + ISL_SAMPLE_COUNT_1_BIT = 1u, + ISL_SAMPLE_COUNT_2_BIT = 2u, + ISL_SAMPLE_COUNT_4_BIT = 4u, + ISL_SAMPLE_COUNT_8_BIT = 8u, + ISL_SAMPLE_COUNT_16_BIT = 16u, +}; +typedef uint32_t isl_sample_count_mask_t; + +/** + * @brief Multisample Format + */ +enum isl_msaa_layout { + /** + * @brief Suface is single-sampled. + */ + ISL_MSAA_LAYOUT_NONE, + + /** + * @brief [SNB+] Interleaved Multisample Format + * + * In this format, multiple samples are interleaved into each cacheline. + * In other words, the sample index is swizzled into the low 6 bits of the + * surface's virtual address space. + * + * For example, suppose the surface is legacy Y tiled, is 4x multisampled, + * and its pixel format is 32bpp. Then the first cacheline is arranged + * thus: + * + * (0,0,0) (0,1,0) (0,0,1) (1,0,1) + * (1,0,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * The hardware docs refer to this format with multiple terms. In + * Sandybridge, this is the only multisample format; so no term is used. + * The Ivybridge docs refer to surfaces in this format as IMS (Interleaved + * Multisample Surface). Later hardware docs additionally refer to this + * format as MSFMT_DEPTH_STENCIL (because the format is deprecated for + * color surfaces). + * + * See the Sandybridge PRM, Volume 4, Part 1, Section 2.7 "Multisampled + * Surface Behavior". + * + * See the Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.1 "Interleaved + * Multisampled Surfaces". + */ + ISL_MSAA_LAYOUT_INTERLEAVED, + + /** + * @brief [IVB+] Array Multisample Format + * + * In this format, the surface's physical layout resembles that of a + * 2D array surface. + * + * Suppose the multisample surface's logical extent is (w, h) and its + * sample count is N. Then surface's physical extent is the same as + * a singlesample 2D surface whose logical extent is (w, h) and array + * length is N. Array slice `i` contains the pixel values for sample + * index `i`. + * + * The Ivybridge docs refer to surfaces in this format as UMS + * (Uncompressed Multsample Layout) and CMS (Compressed Multisample + * Surface). The Broadwell docs additionally refer to this format as + * MSFMT_MSS (MSS=Multisample Surface Storage). + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Uncompressed + * Multisample Surfaces". + * + * See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed + * Multisample Surfaces". + */ + ISL_MSAA_LAYOUT_ARRAY, +}; + + +struct isl_device { + const struct brw_device_info *info; + bool use_separate_stencil; + bool has_bit6_swizzling; +}; + +struct isl_extent2d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; +}; + +struct isl_extent3d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; +}; + +struct isl_extent4d { + union { uint32_t w, width; }; + union { uint32_t h, height; }; + union { uint32_t d, depth; }; + union { uint32_t a, array_len; }; +}; + +struct isl_channel_layout { + enum isl_base_type type; + uint8_t bits; /**< Size in bits */ +}; + +/** + * Each format has 3D block extent (width, height, depth). The block extent of + * compressed formats is that of the format's compression block. For example, + * the block extent of ISL_FORMAT_ETC2_RGB8 is (w=4, h=4, d=1). The block + * extent of uncompressed pixel formats, such as ISL_FORMAT_R8G8B8A8_UNORM, is + * is (w=1, h=1, d=1). + */ +struct isl_format_layout { + enum isl_format format; + + uint8_t bs; /**< Block size, in bytes, rounded towards 0 */ + uint8_t bw; /**< Block width, in pixels */ + uint8_t bh; /**< Block height, in pixels */ + uint8_t bd; /**< Block depth, in pixels */ + + struct { + struct isl_channel_layout r; /**< Red channel */ + struct isl_channel_layout g; /**< Green channel */ + struct isl_channel_layout b; /**< Blue channel */ + struct isl_channel_layout a; /**< Alpha channel */ + struct isl_channel_layout l; /**< Luminance channel */ + struct isl_channel_layout i; /**< Intensity channel */ + struct isl_channel_layout p; /**< Palette channel */ + } channels; + + enum isl_colorspace colorspace; + enum isl_txc txc; +}; + +struct isl_tile_info { + enum isl_tiling tiling; + uint32_t width; /**< in bytes */ + uint32_t height; /**< in rows of memory */ + uint32_t size; /**< in bytes */ +}; + +/** + * @brief Input to surface initialization + * + * @invariant width >= 1 + * @invariant height >= 1 + * @invariant depth >= 1 + * @invariant levels >= 1 + * @invariant samples >= 1 + * @invariant array_len >= 1 + * + * @invariant if 1D then height == 1 and depth == 1 and samples == 1 + * @invariant if 2D then depth == 1 + * @invariant if 3D then array_len == 1 and samples == 1 + */ +struct isl_surf_init_info { + enum isl_surf_dim dim; + enum isl_format format; + + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t levels; + uint32_t array_len; + uint32_t samples; + + /** Lower bound for isl_surf::alignment, in bytes. */ + uint32_t min_alignment; + + /** Lower bound for isl_surf::pitch, in bytes. */ + uint32_t min_pitch; + + isl_surf_usage_flags_t usage; + + /** Flags that alter how ISL selects isl_surf::tiling. */ + isl_tiling_flags_t tiling_flags; +}; + +struct isl_surf { + enum isl_surf_dim dim; + enum isl_dim_layout dim_layout; + enum isl_msaa_layout msaa_layout; + enum isl_tiling tiling; + enum isl_format format; + + /** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ + struct isl_extent3d image_alignment_el; + + /** + * Logical extent of the surface's base level, in units of pixels. This is + * identical to the extent defined in isl_surf_init_info. + */ + struct isl_extent4d logical_level0_px; + + /** + * Physical extent of the surface's base level, in units of physical + * surface samples and aligned to the format's compression block. + * + * Consider isl_dim_layout as an operator that transforms a logical surface + * layout to a physical surface layout. Then + * + * logical_layout := (isl_surf::dim, isl_surf::logical_level0_px) + * isl_surf::phys_level0_sa := isl_surf::dim_layout * logical_layout + */ + struct isl_extent4d phys_level0_sa; + + uint32_t levels; + uint32_t samples; + + /** Total size of the surface, in bytes. */ + uint32_t size; + + /** Required alignment for the surface's base address. */ + uint32_t alignment; + + /** + * Pitch between vertically adjacent surface elements, in bytes. + */ + uint32_t row_pitch; + + /** + * Pitch between physical array slices, in rows of surface elements. + */ + uint32_t array_pitch_el_rows; + + enum isl_array_pitch_span array_pitch_span; + + /** Copy of isl_surf_init_info::usage. */ + isl_surf_usage_flags_t usage; +}; + +extern const struct isl_format_layout isl_format_layouts[]; + +void +isl_device_init(struct isl_device *dev, + const struct brw_device_info *info, + bool has_bit6_swizzling); + +isl_sample_count_mask_t ATTRIBUTE_CONST +isl_device_get_sample_counts(struct isl_device *dev); + +static inline const struct isl_format_layout * ATTRIBUTE_CONST +isl_format_get_layout(enum isl_format fmt) +{ + return &isl_format_layouts[fmt]; +} + +bool +isl_format_has_sint_channel(enum isl_format fmt) ATTRIBUTE_CONST; + +static inline bool +isl_format_is_compressed(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->txc != ISL_TXC_NONE; +} + +static inline bool +isl_format_has_bc_compression(enum isl_format fmt) +{ + switch (isl_format_get_layout(fmt)->txc) { + case ISL_TXC_DXT1: + case ISL_TXC_DXT3: + case ISL_TXC_DXT5: + return true; + case ISL_TXC_NONE: + case ISL_TXC_FXT1: + case ISL_TXC_RGTC1: + case ISL_TXC_RGTC2: + case ISL_TXC_BPTC: + case ISL_TXC_ETC1: + case ISL_TXC_ETC2: + return false; + } + + unreachable("bad texture compression mode"); + return false; +} + +static inline bool +isl_format_is_yuv(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->colorspace == ISL_COLORSPACE_YUV; +} + +static inline bool +isl_format_block_is_1x1x1(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; +} + +static inline bool +isl_format_is_rgb(enum isl_format fmt) +{ + return isl_format_layouts[fmt].channels.r.bits > 0 && + isl_format_layouts[fmt].channels.g.bits > 0 && + isl_format_layouts[fmt].channels.b.bits > 0 && + isl_format_layouts[fmt].channels.a.bits == 0; +} + +enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; +enum isl_format isl_format_rgb_to_rgbx(enum isl_format rgb) ATTRIBUTE_CONST; + +bool isl_is_storage_image_format(enum isl_format fmt); + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format fmt); + +static inline bool +isl_tiling_is_any_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_ANY_MASK; +} + +static inline bool +isl_tiling_is_std_y(enum isl_tiling tiling) +{ + return (1u << tiling) & ISL_TILING_STD_Y_MASK; +} + +bool +isl_tiling_get_info(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_tile_info *info); + +void +isl_tiling_get_extent(const struct isl_device *dev, + enum isl_tiling tiling, + uint32_t format_block_size, + struct isl_extent2d *e); +bool +isl_surf_choose_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling *tiling); + +static inline bool +isl_surf_usage_is_display(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DISPLAY_BIT; +} + +static inline bool +isl_surf_usage_is_depth(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_DEPTH_BIT; +} + +static inline bool +isl_surf_usage_is_stencil(isl_surf_usage_flags_t usage) +{ + return usage & ISL_SURF_USAGE_STENCIL_BIT; +} + +static inline bool +isl_surf_usage_is_depth_and_stencil(isl_surf_usage_flags_t usage) +{ + return (usage & ISL_SURF_USAGE_DEPTH_BIT) && + (usage & ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_usage_is_depth_or_stencil(isl_surf_usage_flags_t usage) +{ + return usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT); +} + +static inline bool +isl_surf_info_is_z16(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R16_UNORM); +} + +static inline bool +isl_surf_info_is_z32_float(const struct isl_surf_init_info *info) +{ + return (info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + (info->format == ISL_FORMAT_R32_FLOAT); +} + +static inline struct isl_extent2d +isl_extent2d(uint32_t width, uint32_t height) +{ + return (struct isl_extent2d) { .w = width, .h = height }; +} + +static inline struct isl_extent3d +isl_extent3d(uint32_t width, uint32_t height, uint32_t depth) +{ + return (struct isl_extent3d) { .w = width, .h = height, .d = depth }; +} + +static inline struct isl_extent4d +isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, + uint32_t array_len) +{ + return (struct isl_extent4d) { + .w = width, + .h = height, + .d = depth, + .a = array_len, + }; +} + +#define isl_surf_init(dev, surf, ...) \ + isl_surf_init_s((dev), (surf), \ + &(struct isl_surf_init_info) { __VA_ARGS__ }); + +bool +isl_surf_init_s(const struct isl_device *dev, + struct isl_surf *surf, + const struct isl_surf_init_info *restrict info); + +void +isl_surf_get_tile_info(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_tile_info *tile_info); + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * elements. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_el(const struct isl_surf *surf) +{ + return surf->image_alignment_el; +} + +/** + * Alignment of the upper-left sample of each subimage, in units of surface + * samples. + */ +static inline struct isl_extent3d +isl_surf_get_image_alignment_sa(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return (struct isl_extent3d) { + .w = fmtl->bw * surf->image_alignment_el.w, + .h = fmtl->bh * surf->image_alignment_el.h, + .d = fmtl->bd * surf->image_alignment_el.d, + }; +} + +/** + * Pitch between vertically adjacent surface elements, in bytes. + */ +static inline uint32_t +isl_surf_get_row_pitch(const struct isl_surf *surf) +{ + return surf->row_pitch; +} + +/** + * Pitch between vertically adjacent surface elements, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_row_pitch_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + assert(surf->row_pitch % fmtl->bs == 0); + return surf->row_pitch / fmtl->bs; +} + +/** + * Pitch between physical array slices, in rows of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el_rows(const struct isl_surf *surf) +{ + return surf->array_pitch_el_rows; +} + +/** + * Pitch between physical array slices, in units of surface elements. + */ +static inline uint32_t +isl_surf_get_array_pitch_el(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_el_rows(surf) * + isl_surf_get_row_pitch_el(surf); +} + +/** + * Pitch between physical array slices, in rows of surface samples. + */ +static inline uint32_t +isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + return fmtl->bh * isl_surf_get_array_pitch_el_rows(surf); +} + +/** + * Pitch between physical array slices, in bytes. + */ +static inline uint32_t +isl_surf_get_array_pitch(const struct isl_surf *surf) +{ + return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; +} + +/** + * Calculate the offset, in units of surface elements, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + +/** + * @brief Calculate the intratile offsets to a subimage in the surface. + * + * In @a base_address_offset return the offset from the base of the surface to + * the base address of the first tile of the subimage. In @a x_offset_el and + * @a y_offset_el, return the offset, in units of surface elements, from the + * tile's base to the subimage's first surface element. The x and y offsets + * are intratile offsets; that is, they do not exceed the boundary of the + * surface's tiling format. + */ +void +isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_format.c b/src/isl/isl_format.c new file mode 100644 index 00000000000..0fe6e9b83ab --- /dev/null +++ b/src/isl/isl_format.c @@ -0,0 +1,95 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "isl.h" + +bool +isl_format_has_sint_channel(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT || + fmtl->channels.b.type == ISL_SINT || + fmtl->channels.a.type == ISL_SINT || + fmtl->channels.l.type == ISL_SINT || + fmtl->channels.i.type == ISL_SINT || + fmtl->channels.p.type == ISL_SINT || + fmtl->channels.g.type == ISL_SINT; +} + +enum isl_format +isl_format_rgb_to_rgba(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT; + case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT; + case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM; + case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM; + case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED; + case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED; + case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED; + case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM; + case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; + case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED; + case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED; + case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM; + case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; + case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED; + case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB; + case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT; + case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT; + case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT; + case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} + +enum isl_format +isl_format_rgb_to_rgbx(enum isl_format rgb) +{ + assert(isl_format_is_rgb(rgb)); + + switch (rgb) { + case ISL_FORMAT_R32G32B32_FLOAT: + return ISL_FORMAT_R32G32B32X32_FLOAT; + case ISL_FORMAT_R16G16B16_UNORM: + return ISL_FORMAT_R16G16B16X16_UNORM; + case ISL_FORMAT_R16G16B16_FLOAT: + return ISL_FORMAT_R16G16B16X16_FLOAT; + case ISL_FORMAT_R8G8B8_UNORM: + return ISL_FORMAT_R8G8B8X8_UNORM; + case ISL_FORMAT_R8G8B8_UNORM_SRGB: + return ISL_FORMAT_R8G8B8X8_UNORM_SRGB; + default: + return ISL_FORMAT_UNSUPPORTED; + } +} diff --git a/src/isl/isl_format_layout.csv b/src/isl/isl_format_layout.csv new file mode 100644 index 00000000000..af2786ae630 --- /dev/null +++ b/src/isl/isl_format_layout.csv @@ -0,0 +1,287 @@ +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +# +# @file +# @brief Layout of all hardware surface formats +# +# For the official list, see Broadwell PRM: Volume 2b: Command Reference: +# Enumerations: SURFACE_FORMAT. +# + + +# Columns: +# name: format name in PRM +# bpb: bits per block +# bw: block width, in pixels +# bh: block height, in pixels +# bd: block depth, in pixels +# r: red channel, data type and bitwidth +# g: green channel +# b: blue channel +# a: alpha channel +# l: luminance channel +# i: intensity channel +# p: palette channel +# space: colorspace +# txc: texture compression +# +# Data Types: +# x: void +# r: raw +# un: unorm +# sn: snorm +# uf: ufloat +# sf: sfloat +# ux: ufixed +# sx: sfixed +# ui: uint +# si: sint +# us: uscaled +# ss: sscaled + + +# Table is aligned with the Vim commands below, using the Align plugin: +# :AlignCtrl lr+ p8000000000000P1 +# /^# name/,$ Align, + +# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc +R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear, +R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear, +R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear, +R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear, +R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear, +R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear, +R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear, +R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear, +R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear, +R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear, +R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , , +R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear, +R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear, +R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear, +R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear, +R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear, +R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear, +R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear, +R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear, +R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear, +R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear, +R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear, +R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear, +R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear, +R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear, +R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear, +R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear, +R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear, +X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear, +L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear, +R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear, +R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear, +R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear, +R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear, +R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear, +A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha, +L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear, +I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear, +R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear, +R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear, +R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear, +R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear, +R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear, +R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , , +B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear, +R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear, +R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb, +R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear, +R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear, +R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear, +R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear, +R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear, +R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear, +R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear, +R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear, +B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear, +B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb, +R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear, +R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear, +R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear, +R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear, +R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear, +X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear, +L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear, +A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha, +L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear, +I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear, +L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear, +A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha, +I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear, +L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear, +A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha, +X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear, +B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear, +B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear, +R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb, +R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear, +B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear, +L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear, +R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear, +R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear, +R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear, +R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear, +R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear, +R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear, +R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear, +R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear, +R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear, +B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear, +B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb, +B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb, +B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb, +R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear, +R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear, +R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear, +R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear, +R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear, +R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear, +R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear, +R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear, +R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear, +A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear, +L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear, +A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha, +L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear, +I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear, +L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear, +A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha, +L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb, +R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear, +B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear, +B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb, +R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear, +R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear, +R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear, +R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear, +P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear, +P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear, +A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear, +A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear, +L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear, +L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear, +R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear, +R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear, +R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear, +R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear, +A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha, +I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear, +L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear, +P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear, +R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear, +R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear, +P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear, +L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear, +P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear, +P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear, +Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv, +L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear, +L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear, +I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear, +I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear, +DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1 +R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear, +YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv, +P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear, +P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear, +BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1 +BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3 +BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5 +BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1 +BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2 +BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1 +BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3 +BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5 +MONO8 , 1, 1, 1, 1, , , , , , , , , +YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv, +YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv, +DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1 +FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1 +R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear, +R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear, +R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear, +R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear, +R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear, +R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear, +BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1 +BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2 +R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear, +R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear, +R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear, +R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear, +R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear, +BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc +BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc +BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc +BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc +PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv, +R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb, +ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1 +ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2 +EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2 +EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2 +EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2 +EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2 +ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2 +R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear, +R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear, +R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear, +R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear, +B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear, +B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear, +B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear, +B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear, +R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , , +R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , , +ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2 +ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2 +ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2 +ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2 +R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear, +R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear, +RAW , 0, 0, 0, 0, , , , , , , , , diff --git a/src/isl/isl_format_layout_gen.bash b/src/isl/isl_format_layout_gen.bash new file mode 100755 index 00000000000..db883827376 --- /dev/null +++ b/src/isl/isl_format_layout_gen.bash @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Copyright 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +set -eu +set -o pipefail + +cat <<'EOF' +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" + +const struct isl_format_layout +isl_format_layouts[] = { +EOF + +sed -r ' +# Delete comment lines and empty lines +/^[[:space:]]*#/d +/^[[:space:]]*$/d + +# Delete spaces +s/[[:space:]]//g + +# Translate formats +s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/ + +# Translate data type of channels +s/\<x([0-9]+),/ISL_VOID@\1,/g +s/\<r([0-9]+),/ISL_RAW@\1,/g +s/\<un([0-9]+),/ISL_UNORM@\1,/g +s/\<sn([0-9]+),/ISL_SNORM@\1,/g +s/\<uf([0-9]+),/ISL_UFLOAT@\1,/g +s/\<sf([0-9]+),/ISL_SFLOAT@\1,/g +s/\<ux([0-9]+),/ISL_UFIXED@\1,/g +s/\<sx([0-9]+),/ISL_SFIXED@\1,/g +s/\<ui([0-9]+),/ISL_UINT@\1,/g +s/\<si([0-9]+),/ISL_SINT@\1,/g +s/\<us([0-9]+),/ISL_USCALED@\1,/g +s/\<ss([0-9]+),/ISL_SSCALED@\1,/g + +# Translate colorspaces +# Interpret alpha-only formats as having no colorspace. +s/\<(linear|srgb|yuv)\>/ISL_COLORSPACE_\1/ +s/\<alpha\>// + +# Translate texture compression +s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/ +' | +tr 'a-z' 'A-Z' | # Convert to uppersace +while IFS=, read -r format bpb bw bh bd \ + red green blue alpha \ + luminance intensity palette \ + colorspace txc +do + : ${colorspace:=ISL_COLORSPACE_NONE} + : ${txc:=ISL_TXC_NONE} + + cat <<EOF + [$format] = { + $format, + .bs = $((bpb/8)), + .bw = $bw, .bh = $bh, .bd = $bd, + .channels = { + .r = { $red }, + .g = { $green }, + .b = { $blue }, + .a = { $alpha }, + .l = { $luminance }, + .i = { $intensity }, + .p = { $palette }, + }, + .colorspace = $colorspace, + .txc = $txc, + }, + +EOF +done | +sed -r ' +# Collapse empty channels +s/\{ \}/{}/ + +# Split non-empty channels into two members: base type and bit size +s/@/, / +' + +# Terminate the table +printf '};\n' diff --git a/src/isl/isl_gen4.c b/src/isl/isl_gen4.c new file mode 100644 index 00000000000..52aa5655bb2 --- /dev/null +++ b/src/isl/isl_gen4.c @@ -0,0 +1,74 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen4.h" +#include "isl_priv.h" + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + /* Gen4 and Gen5 do not support MSAA */ + assert(info->samples >= 1); + + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; +} + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(info->samples == 1); + assert(msaa_layout == ISL_MSAA_LAYOUT_NONE); + assert(!isl_tiling_is_std_y(tiling)); + + /* Note that neither the surface's horizontal nor vertical image alignment + * is programmable on gen4 nor gen5. + * + * From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression + * cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | 2 | + * | uncompressed formats | 4 | 2 | + * +------------------------+--------+--------+ + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/isl/isl_gen4.h b/src/isl/isl_gen4.h new file mode 100644 index 00000000000..06cd70b9206 --- /dev/null +++ b/src/isl/isl_gen4.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen4_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen4_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen6.c b/src/isl/isl_gen6.c new file mode 100644 index 00000000000..24c393925ed --- /dev/null +++ b/src/isl/isl_gen6.c @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen6.h" +#include "isl_priv.h" + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(ISL_DEV_GEN(dev) == 6); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return false; + } + + /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: + * + * - any format with greater than 64 bits per element + * - any compressed texture format (BC*) + * - any YCRCB* format + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of + * Multisamples: + * + * If this field is any value other than MULTISAMPLECOUNT_1 the + * following restrictions apply: + * + * - the Surface Type must be SURFTYPE_2D + * - [...] + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + if (info->levels > 1) + return false; + + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; +} + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* Note that the surface's horizontal image alignment is not programmable + * on Sandybridge. + * + * From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4 + * Alignment Unit Size: + * + * Note that the compressed formats are padded to a full compression cell. + * + * +------------------------+--------+--------+ + * | format | halign | valign | + * +------------------------+--------+--------+ + * | YUV 4:2:2 formats | 4 | * | + * | uncompressed formats | 4 | * | + * +------------------------+--------+--------+ + * + * * For these formats, the vertical alignment factor “j” is determined + * as follows: + * - j = 4 for any depth buffer + * - j = 2 for separate stencil buffer + * - j = 4 for any render target surface is multisampled (4x) + * - j = 2 for all other render target surface + * + * From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2 + * SURFACE_STATE, Surface Vertical Alignment: + * + * - This field must be set to VALIGN_2 if the Surface Format is 96 bits + * per element (BPE). + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + */ + + if (isl_format_is_compressed(info->format)) { + *image_align_el = isl_extent3d(1, 1, 1); + return; + } + + if (isl_format_is_yuv(info->format)) { + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + if (info->samples > 1) { + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth_or_stencil(info->usage) && + !ISL_DEV_USE_SEPARATE_STENCIL(dev)) { + /* interleaved depthstencil buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_depth(info->usage)) { + /* separate depth buffer */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* separate stencil buffer */ + *image_align_el = isl_extent3d(4, 2, 1); + return; + } + + *image_align_el = isl_extent3d(4, 2, 1); +} diff --git a/src/isl/isl_gen6.h b/src/isl/isl_gen6.h new file mode 100644 index 00000000000..0779c674940 --- /dev/null +++ b/src/isl/isl_gen6.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen6_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen6_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen7.c b/src/isl/isl_gen7.c new file mode 100644 index 00000000000..7064e852e65 --- /dev/null +++ b/src/isl/isl_gen7.c @@ -0,0 +1,395 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen7.h" +#include "isl_priv.h" + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + bool require_array = false; + bool require_interleaved = false; + + assert(ISL_DEV_GEN(dev) == 7); + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface + * Format: + * + * If Number of Multisamples is set to a value other than + * MULTISAMPLECOUNT_1, this field cannot be set to the following + * formats: any format with greater than 64 bits per element, any + * compressed texture format (BC*), and any YCRCB* format. + */ + if (fmtl->bs > 8) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* The Ivyrbridge PRM insists twice that signed integer formats cannot be + * multisampled. + * + * From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of + * Multisamples: + * + * - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when + * all RT channels are not written. + * + * And errata from the Ivybridge PRM, Volume 4 Part 1 p77, + * RENDER_SURFACE_STATE, MCS Enable: + * + * This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs + * when all RT channels are not written. + * + * Note that the above SINT restrictions apply only to *MSRTs* (that is, + * *multisampled* render targets). The restrictions seem to permit an MCS + * if the render target is singlesampled. + */ + if (isl_format_has_sint_channel(info->format)) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (tiling == ISL_TILING_LINEAR) + return false; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * +---------------------+----------------------------------------------------------------+ + * | MSFMT_MSS | Multsampled surface was/is rendered as a render target | + * | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer | + * +---------------------+----------------------------------------------------------------+ + * + * In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and + * MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED. + */ + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width + * is >= 8192 (meaning the actual surface width is >= 8193 pixels), this + * field must be set to MSFMT_MSS. + */ + if (info->samples == 8 && info->width == 8192) + require_array = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, + * ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number + * of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is + * > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL. + */ + if ((info->samples == 8 && info->height > 4194304u) || + (info->samples == 4 && info->height > 8388608u)) + require_interleaved = true; + + /* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled + * Suface Storage Format: + * + * This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is + * one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or + * R24_UNORM_X8_TYPELESS. + */ + if (info->format == ISL_FORMAT_I24X8_UNORM || + info->format == ISL_FORMAT_L24X8_UNORM || + info->format == ISL_FORMAT_A24X8_UNORM || + info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + /* Default to the array layout because it permits multisample + * compression. + */ + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +static bool +gen7_format_needs_valign2(const struct isl_device *dev, + enum isl_format format) +{ + /* This workaround applies only to gen7 */ + if (ISL_DEV_GEN(dev) > 7) + return false; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL + * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY + * (0x190) + * + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT. + */ + return isl_format_is_yuv(format) || + format == ISL_FORMAT_R32G32B32_FLOAT; +} + +/** + * @brief Filter out tiling flags that are incompatible with the surface. + * + * The resultant outgoing @a flags is a subset of the incoming @a flags. The + * outgoing flags may be empty (0x0) if the incoming flags were too + * restrictive. + * + * For example, if the surface will be used for a display + * (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling + * flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT. + */ +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags) +{ + /* IVB+ requires separate stencil */ + assert(ISL_DEV_USE_SEPARATE_STENCIL(dev)); + + /* Clear flags unsupported on this hardware */ + if (ISL_DEV_GEN(dev) < 9) { + *flags &= ~ISL_TILING_Yf_BIT; + *flags &= ~ISL_TILING_Ys_BIT; + } + + /* And... clear the Yf and Ys bits anyway because Anvil doesn't support + * them yet. + */ + *flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */ + *flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */ + + if (isl_surf_usage_is_depth(info->usage)) { + /* Depth requires Y. */ + *flags &= ISL_TILING_ANY_Y_MASK; + } + + /* Separate stencil requires W tiling, and W tiling requires separate + * stencil. + */ + if (isl_surf_usage_is_stencil(info->usage)) { + *flags &= ISL_TILING_W_BIT; + } else { + *flags &= ~ISL_TILING_W_BIT; + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT | + ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle rotated display surfaces", + __FILE__, __func__); + } + + if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT | + ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) { + assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT); + isl_finishme("%s:%s: handle flipped display surfaces", + __FILE__, __func__); + } + + if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) { + /* Before Skylake, the display engine does not accept Y */ + /* FINISHME[SKL]: Y tiling for display surfaces */ + *flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT); + } + + if (info->samples > 1) { + /* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled + * Surface: + * + * For multisample render targets, this field must be 1 (true). MSRTs + * can only be tiled. + * + * Multisample surfaces never require X tiling, and Y tiling generally + * performs better than X. So choose Y. (Unless it's stencil, then it + * must be W). + */ + *flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT); + } + + /* workaround */ + if (ISL_DEV_GEN(dev) == 7 && + gen7_format_needs_valign2(dev, info->format) && + (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && + info->samples == 1) { + /* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1, + * SURFACE_STATE Surface Vertical Alignment: + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + */ + *flags &= ~ISL_TILING_Y0_BIT; + } +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1, + * RENDER_SURFACE_STATE Surface Hoizontal Alignment: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer, + * since these surfaces support only alignment of 8. Use of HALIGN_8 + * for other surfaces is supported, but uses more memory. + */ + if (isl_surf_info_is_z16(info) || + isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen7_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling) +{ + bool require_valign2 = false; + bool require_valign4 = false; + + if (isl_format_is_compressed(info->format)) + return 1; + + if (gen7_format_needs_valign2(dev, info->format)) + require_valign2 = true; + + /* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1: + * RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but uses more memory. This field must be set to + * VALIGN_4 for all tiled Y Render Target surfaces. + * + */ + if (isl_surf_usage_is_depth(info->usage) || + info->samples > 1 || + tiling == ISL_TILING_Y0) { + require_valign4 = true; + } + + if (isl_surf_usage_is_stencil(info->usage)) { + /* The Ivybridge PRM states that the stencil buffer's vertical alignment + * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment + * Unit Size]. However, valign=8 is outside the set of valid values of + * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2 + * (0x0) and VALIGN_4 (0x1). + * + * The PRM is generally confused about the width, height, and alignment + * of the stencil buffer; and this confusion appears elsewhere. For + * example, the following PRM text effectively converts the stencil + * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM, + * Volume 1, Part 1, Section + * 6.18.4.2 Base Address and LOD Calculation]: + * + * For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2 as follows: + * + * w_L = 2*i*ceil(W_L/i) + * h_L = 1/2*j*ceil(H_L/j) + * + * The root of the confusion is that, in W tiling, each pair of rows is + * interleaved into one. + * + * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API + * is more polished. + */ + require_valign4 = true; + } + + assert(!require_valign2 || !require_valign4); + + if (require_valign4) + return 4; + + /* Prefer VALIGN_2 because it conserves memory. */ + return 2; +} + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* IVB+ does not support combined depthstencil. */ + assert(!isl_surf_usage_is_depth_and_stencil(info->usage)); + + *image_align_el = (struct isl_extent3d) { + .w = gen7_choose_halign_el(dev, info), + .h = gen7_choose_valign_el(dev, info, tiling), + .d = 1, + }; +} diff --git a/src/isl/isl_gen7.h b/src/isl/isl_gen7.h new file mode 100644 index 00000000000..2a95b68a9bd --- /dev/null +++ b/src/isl/isl_gen7.h @@ -0,0 +1,52 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen7_filter_tiling(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + isl_tiling_flags_t *flags); + +bool +gen7_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen7_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen8.c b/src/isl/isl_gen8.c new file mode 100644 index 00000000000..a46427aacc8 --- /dev/null +++ b/src/isl/isl_gen8.c @@ -0,0 +1,229 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_priv.h" + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout) +{ + bool require_array = false; + bool require_interleaved = false; + + assert(info->samples >= 1); + + if (info->samples == 1) { + *msaa_layout = ISL_MSAA_LAYOUT_NONE; + return true; + } + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Tile Mode: + * + * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be YMAJOR. + * + * As usual, though, stencil is special. + */ + if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage)) + return false; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Multisampled Surface Storage Format: + * + * All multisampled render target surfaces must have this field set to + * MSFMT_MSS + */ + if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) + require_array = true; + + /* From the Broadwell PRM >> Volume2d: Command Structures >> + * RENDER_SURFACE_STATE Number of Multisamples: + * + * - If this field is any value other than MULTISAMPLECOUNT_1, the + * Surface Type must be SURFTYPE_2D This field must be set to + * MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface + * or Render Target surface. + * + * - If this field is any value other than MULTISAMPLECOUNT_1, Surface + * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero. + */ + if (info->dim != ISL_SURF_DIM_2D) + return false; + if (info->levels > 1) + return false; + + /* More obvious restrictions */ + if (isl_surf_usage_is_display(info->usage)) + return false; + if (isl_format_is_compressed(info->format)) + return false; + if (isl_format_is_yuv(info->format)) + return false; + + if (isl_surf_usage_is_depth_or_stencil(info->usage)) + require_interleaved = true; + + if (require_array && require_interleaved) + return false; + + if (require_interleaved) { + *msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED; + return true; + } + + *msaa_layout = ISL_MSAA_LAYOUT_ARRAY; + return true; +} + +/** + * Choose horizontal subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_halign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + if (isl_format_is_compressed(info->format)) + return 1; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - This field is intended to be set to HALIGN_8 only if the surface + * was rendered as a depth buffer with Z16 format or a stencil buffer. + * In this case it must be set to HALIGN_8 since these surfaces + * support only alignment of 8. [...] + */ + if (isl_surf_info_is_z16(info)) + return 8; + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * [...] For Z32 formats it must be set to HALIGN_4. + */ + if (isl_surf_usage_is_depth(info->usage)) + return 4; + + if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + /* From the Broadwell PRM, Volume 2d "Command Reference: Structures", + * RENDER_SURFACE_STATE Surface Horizontal Alignment, p326: + * + * - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, + * HALIGN 16 must be used. + * + * This case handles color surfaces that may own an auxiliary MCS, CCS_D, + * or CCS_E. Depth buffers, including those that own an auxiliary HiZ + * surface, are handled above and do not require HALIGN_16. + */ + assert(!isl_surf_usage_is_depth(info->usage)); + return 16; + } + + /* XXX(chadv): I believe the hardware requires each image to be + * cache-aligned. If that's true, then defaulting to halign=4 is wrong for + * many formats. Depending on the format's block size, we may need to + * increase halign to 8. + */ + return 4; +} + +/** + * Choose vertical subimage alignment, in units of surface elements. + */ +static uint32_t +gen8_choose_valign_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info) +{ + /* From the Broadwell PRM > Volume 2d: Command Reference: Structures + * > RENDER_SURFACE_STATE Surface Vertical Alignment (p325): + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is intended to be set to VALIGN_4 if the surface was + * rendered as a depth buffer, for a multisampled (4x) render target, + * or for a multisampled (8x) render target, since these surfaces + * support only alignment of 4. Use of VALIGN_4 for other surfaces is + * supported, but increases memory usage. + * + * - This field is intended to be set to VALIGN_8 only if the surface + * was rendered as a stencil buffer, since stencil buffer surfaces + * support only alignment of 8. If set to VALIGN_8, Surface Format + * must be R8_UINT. + */ + + if (isl_format_is_compressed(info->format)) + return 1; + + if (isl_surf_usage_is_stencil(info->usage)) + return 8; + + return 4; +} + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + assert(!isl_tiling_is_std_y(tiling)); + + /* The below text from the Broadwell PRM provides some insight into the + * hardware's requirements for LOD alignment. From the Broadwell PRM >> + * Volume 5: Memory Views >> Surface Layout >> 2D Surfaces: + * + * These [2D surfaces] must adhere to the following memory organization + * rules: + * + * - For non-compressed texture formats, each mipmap must start on an + * even row within the monolithic rectangular area. For + * 1-texel-high mipmaps, this may require a row of padding below + * the previous mipmap. This restriction does not apply to any + * compressed texture formats; each subsequent (lower-res) + * compressed mipmap is positioned directly below the previous + * mipmap. + * + * - Vertical alignment restrictions vary with memory tiling type: + * 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled + * mipmaps are not required to start at the left edge of a tile + * row.) + */ + + *image_align_el = (struct isl_extent3d) { + .w = gen8_choose_halign_el(dev, info), + .h = gen8_choose_valign_el(dev, info), + .d = 1, + }; +} diff --git a/src/isl/isl_gen8.h b/src/isl/isl_gen8.h new file mode 100644 index 00000000000..2017ea8ddc1 --- /dev/null +++ b/src/isl/isl_gen8.h @@ -0,0 +1,47 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +gen8_choose_msaa_layout(const struct isl_device *dev, + const struct isl_surf_init_info *info, + enum isl_tiling tiling, + enum isl_msaa_layout *msaa_layout); + +void +gen8_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_gen9.c b/src/isl/isl_gen9.c new file mode 100644 index 00000000000..aa290aa1c35 --- /dev/null +++ b/src/isl/isl_gen9.c @@ -0,0 +1,185 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl_gen8.h" +#include "isl_gen9.h" +#include "isl_priv.h" + +/** + * Calculate the surface's subimage alignment, in units of surface samples, + * for the standard tiling formats Yf and Ys. + */ +static void +gen9_calc_std_image_alignment_sa(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *align_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); + + assert(isl_tiling_is_std_y(tiling)); + + const uint32_t bs = fmtl->bs; + const uint32_t is_Ys = tiling == ISL_TILING_Ys; + + switch (info->dim) { + case ISL_SURF_DIM_1D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)), + .h = 1, + .d = 1, + }; + return; + case ISL_SURF_DIM_2D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment + * Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)), + .h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)), + .d = 1, + }; + + if (is_Ys) { + /* FINISHME(chadv): I don't trust this code. Untested. */ + isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__); + + switch (msaa_layout) { + case ISL_MSAA_LAYOUT_NONE: + case ISL_MSAA_LAYOUT_INTERLEAVED: + break; + case ISL_MSAA_LAYOUT_ARRAY: + align_sa->w >>= (ffs(info->samples) - 0) / 2; + align_sa->h >>= (ffs(info->samples) - 1) / 2; + break; + } + } + return; + + case ISL_SURF_DIM_3D: + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *align_sa = (struct isl_extent3d) { + .w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)), + .h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)), + .d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)), + }; + return; + } + + unreachable("bad isl_surface_type"); +} + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el) +{ + /* This BSpec text provides some insight into the hardware's alignment + * requirements [Skylake BSpec > Memory Views > Common Surface Formats > + * Surface Layout and Tiling > 2D Surfaces]: + * + * An LOD must be aligned to a cache-line except for some special cases + * related to Planar YUV surfaces. In general, the cache-alignment + * restriction implies there is a minimum height for an LOD of 4 texels. + * So, LODs which are smaller than 4 high are padded. + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment: + * + * - For Sampling Engine and Render Target Surfaces: This field + * specifies the vertical alignment requirement in elements for the + * surface. [...] An element is defined as a pixel in uncompresed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + * + * - This field is used for 2D, CUBE, and 3D surface alignment when Tiled + * Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled). + * This field is ignored for 1D surfaces and also when Tiled Resource + * Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled). + * + * See the appropriate Alignment table in the "Surface Layout and + * Tiling" section under Common Surface Formats for the table of + * alignment values for Tiled Resrouces. + * + * - For uncompressed surfaces, the units of "j" are rows of pixels on + * the physical surface. For compressed texture formats, the units of + * "j" are in compression blocks, thus each increment in "j" is equal + * to h pixels, where h is the height of the compression block in + * pixels. + * + * - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16 + * + * From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal + * Alignment: + * + * - For uncompressed surfaces, the units of "i" are pixels on the + * physical surface. For compressed texture formats, the units of "i" + * are in compression blocks, thus each increment in "i" is equal to + * w pixels, where w is the width of the compression block in pixels. + * + * - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16 + */ + + if (isl_tiling_is_std_y(tiling)) { + struct isl_extent3d image_align_sa; + gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout, + &image_align_sa); + + *image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa); + return; + } + + if (info->dim == ISL_SURF_DIM_1D) { + /* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface + * Layout and Tiling > 1D Surfaces > 1D Alignment Requirements. + */ + *image_align_el = isl_extent3d(64, 1, 1); + return; + } + + if (isl_format_is_compressed(info->format)) { + /* On Gen9, the meaning of RENDER_SURFACE_STATE's + * SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for + * compressed formats. They now indicate a multiple of the compression + * block. For example, if the compression mode is ETC2 then HALIGN_4 + * indicates a horizontal alignment of 16 pixels. + * + * To avoid wasting memory, choose the smallest alignment possible: + * HALIGN_4 and VALIGN_4. + */ + *image_align_el = isl_extent3d(4, 4, 1); + return; + } + + gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout, + image_align_el); +} diff --git a/src/isl/isl_gen9.h b/src/isl/isl_gen9.h new file mode 100644 index 00000000000..64ed0aa44ef --- /dev/null +++ b/src/isl/isl_gen9.h @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "isl_priv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +gen9_choose_image_alignment_el(const struct isl_device *dev, + const struct isl_surf_init_info *restrict info, + enum isl_tiling tiling, + enum isl_msaa_layout msaa_layout, + struct isl_extent3d *image_align_el); + +#ifdef __cplusplus +} +#endif diff --git a/src/isl/isl_image.c b/src/isl/isl_image.c new file mode 100644 index 00000000000..773160432b9 --- /dev/null +++ b/src/isl/isl_image.c @@ -0,0 +1,188 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "isl.h" +#include "brw_compiler.h" + +bool +isl_is_storage_image_format(enum isl_format format) +{ + /* XXX: Maybe we should put this in the CSV? */ + + switch (format) { + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return true; + default: + return false; + } +} + +enum isl_format +isl_lower_storage_image_format(const struct isl_device *dev, + enum isl_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case ISL_FORMAT_R32G32B32A32_UINT: + case ISL_FORMAT_R32G32B32A32_SINT: + case ISL_FORMAT_R32G32B32A32_FLOAT: + case ISL_FORMAT_R32_UINT: + case ISL_FORMAT_R32_SINT: + case ISL_FORMAT_R32_FLOAT: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case ISL_FORMAT_R16G16B16A16_UINT: + case ISL_FORMAT_R16G16B16A16_SINT: + case ISL_FORMAT_R16G16B16A16_FLOAT: + case ISL_FORMAT_R32G32_UINT: + case ISL_FORMAT_R32G32_SINT: + case ISL_FORMAT_R32G32_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case ISL_FORMAT_R8G8B8A8_UINT: + case ISL_FORMAT_R8G8B8A8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UINT: + case ISL_FORMAT_R16G16_SINT: + case ISL_FORMAT_R16G16_FLOAT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UINT: + case ISL_FORMAT_R8G8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : + ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UINT: + case ISL_FORMAT_R16_FLOAT: + case ISL_FORMAT_R16_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R8_UINT: + case ISL_FORMAT_R8_SINT: + return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case ISL_FORMAT_R10G10B10A2_UINT: + case ISL_FORMAT_R10G10B10A2_UNORM: + case ISL_FORMAT_R11G11B10_FLOAT: + return ISL_FORMAT_R32_UINT; + + /* No normalized fixed-point formats are supported by the hardware. */ + case ISL_FORMAT_R16G16B16A16_UNORM: + case ISL_FORMAT_R16G16B16A16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16B16A16_UINT : + ISL_FORMAT_R32G32_UINT); + + case ISL_FORMAT_R8G8B8A8_UNORM: + case ISL_FORMAT_R8G8B8A8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R16G16_UNORM: + case ISL_FORMAT_R16G16_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT); + + case ISL_FORMAT_R8G8_UNORM: + case ISL_FORMAT_R8G8_SNORM: + return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ? + ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT); + + case ISL_FORMAT_R16_UNORM: + case ISL_FORMAT_R16_SNORM: + return ISL_FORMAT_R16_UINT; + + case ISL_FORMAT_R8_UNORM: + case ISL_FORMAT_R8_SNORM: + return ISL_FORMAT_R8_UINT; + + default: + assert(!"Unknown image format"); + return ISL_FORMAT_UNSUPPORTED; + } +} diff --git a/src/isl/isl_priv.h b/src/isl/isl_priv.h new file mode 100644 index 00000000000..b399e0f8116 --- /dev/null +++ b/src/isl/isl_priv.h @@ -0,0 +1,141 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <assert.h> + +#include "brw_device_info.h" +#include "mesa/main/imports.h" +#include "util/macros.h" + +#include "isl.h" + +#define isl_finishme(format, ...) \ + __isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__) + +void PRINTFLIKE(3, 4) UNUSED +__isl_finishme(const char *file, int line, const char *fmt, ...); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +ffs(uint32_t n) { + return __builtin_ffs(n); +} + +static inline bool +isl_is_pow2(uintmax_t n) +{ + return !(n & (n - 1)); +} + +/** + * Alignment must be a power of 2. + */ +static inline bool +isl_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(isl_is_pow2(a)); + return (n & (a - 1)) == 0; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align(uintmax_t n, uintmax_t a) +{ + assert(a != 0 && isl_is_pow2(a)); + return (n + a - 1) & ~(a - 1); +} + +static inline uintmax_t +isl_align_npot(uintmax_t n, uintmax_t a) +{ + assert(a > 0); + return ((n + a - 1) / a) * a; +} + +/** + * Alignment must be a power of 2. + */ +static inline uintmax_t +isl_align_div(uintmax_t n, uintmax_t a) +{ + return isl_align(n, a) / a; +} + +static inline uintmax_t +isl_align_div_npot(uintmax_t n, uintmax_t a) +{ + return isl_align_npot(n, a) / a; +} + +/** + * Log base 2, rounding towards zero. + */ +static inline uint32_t +isl_log2u(uint32_t n) +{ + assert(n != 0); + return 31 - __builtin_clz(n); +} + +static inline uint32_t +isl_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline struct isl_extent3d +isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + assert(extent_sa.w % fmtl->bw == 0); + assert(extent_sa.h % fmtl->bh == 0); + assert(extent_sa.d % fmtl->bd == 0); + + return (struct isl_extent3d) { + .w = extent_sa.w / fmtl->bw, + .h = extent_sa.h / fmtl->bh, + .d = extent_sa.d / fmtl->bd, + }; +} + +static inline struct isl_extent3d +isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return (struct isl_extent3d) { + .w = extent_el.w * fmtl->bw, + .h = extent_el.h * fmtl->bh, + .d = extent_el.d * fmtl->bd, + }; +} diff --git a/src/isl/tests/.gitignore b/src/isl/tests/.gitignore new file mode 100644 index 00000000000..ba70ecfbee4 --- /dev/null +++ b/src/isl/tests/.gitignore @@ -0,0 +1 @@ +/isl_surf_get_image_offset_test diff --git a/src/isl/tests/isl_surf_get_image_offset_test.c b/src/isl/tests/isl_surf_get_image_offset_test.c new file mode 100644 index 00000000000..cda8583daeb --- /dev/null +++ b/src/isl/tests/isl_surf_get_image_offset_test.c @@ -0,0 +1,353 @@ +/* + * Copyright 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#include "brw_device_info.h" +#include "isl.h" +#include "isl_priv.h" + +#define BDW_GT2_DEVID 0x161a + +// An asssert that works regardless of NDEBUG. +#define t_assert(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) + +static void +t_assert_extent4d(const struct isl_extent4d *e, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert(e->width == width); + t_assert(e->height == height); + t_assert(e->depth == depth); + t_assert(e->array_len == array_len); +} + +static void +t_assert_image_alignment_el(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_el; + + align_el = isl_surf_get_image_alignment_el(surf); + t_assert(align_el.w == w); + t_assert(align_el.h == h); + t_assert(align_el.d == d); + +} + +static void +t_assert_image_alignment_sa(const struct isl_surf *surf, + uint32_t w, uint32_t h, uint32_t d) +{ + struct isl_extent3d align_sa; + + align_sa = isl_surf_get_image_alignment_sa(surf); + t_assert(align_sa.w == w); + t_assert(align_sa.h == h); + t_assert(align_sa.d == d); + +} + +static void +t_assert_offset_el(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) +{ + uint32_t x, y; + isl_surf_get_image_offset_el(surf, level, logical_array_layer, + logical_z_offset_px, &x, &y); + + t_assert(x == expected_x_offset_el); + t_assert(y == expected_y_offset_el); +} + +static void +t_assert_intratile_offset_el(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t expected_base_address_offset, + uint32_t expected_x_offset_el, + uint32_t expected_y_offset_el) +{ + uint32_t base_address_offset; + uint32_t x_offset_el, y_offset_el; + isl_surf_get_image_intratile_offset_el(dev, surf, + level, + logical_array_layer, + logical_z_offset_px, + &base_address_offset, + &x_offset_el, + &y_offset_el); + + t_assert(base_address_offset == expected_base_address_offset); + t_assert(x_offset_el == expected_x_offset_el); + t_assert(y_offset_el == expected_y_offset_el); +} + +static void +t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width, + uint32_t height, uint32_t depth, uint32_t array_len) +{ + t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len); +} + +static void +t_assert_gen4_3d_layer(const struct isl_surf *surf, + uint32_t level, + uint32_t aligned_width, + uint32_t aligned_height, + uint32_t depth, + uint32_t horiz_layers, + uint32_t vert_layers, + uint32_t *base_y) +{ + for (uint32_t z = 0; z < depth; ++z) { + t_assert_offset_el(surf, level, 0, z, + aligned_width * (z % horiz_layers), + *base_y + aligned_height * (z / horiz_layers)); + } + + *base_y += aligned_height * vert_layers; +} + +static void +test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 512, + .height = 512, + .depth = 1, + .levels = 10, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert_phys_level0_sa(&surf, 512, 512, 1, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 2048); + + t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0 + t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512 + t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0 + t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128 + t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64 + t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32 + t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16 + t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8 + t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4 + t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4 + + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x100000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x100400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x140400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x160400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x170400, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x170400, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x170400, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x170400, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x180400, 0, 0); +} + +static void +test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 1024, + .height = 1024, + .depth = 1, + .levels = 11, + .array_len = 6, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + + t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == + isl_surf_get_array_pitch_sa_rows(&surf)); + + /* Row pitch should be minimal possible */ + t_assert(surf.row_pitch == 4096); + + for (uint32_t a = 0; a < 6; ++a) { + uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf); + + t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0 + t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024 + t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0 + t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256 + t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128 + t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64 + t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32 + t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16 + t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8 + t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4 + t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4 + + } + + /* The layout below assumes a specific array pitch. It will need updating + * if isl's array pitch calculations ever change. + */ + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540); + + /* array layer 0 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 0, 0, 0x0, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 1, 0, 0, 0x400000, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 2, 0, 0, 0x400800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 3, 0, 0, 0x500800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 4, 0, 0, 0x580800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 5, 0, 0, 0x5c0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 6, 0, 0, 0x5e0800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 7, 0, 0, 0x5e0800, 0, 16); + t_assert_intratile_offset_el(&dev, &surf, 8, 0, 0, 0x5e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 9, 0, 0, 0x5e0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 10, 0, 0, 0x600800, 0, 0); + + /* array layer 1 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 1, 0, 0x600000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 1, 1, 0, 0xa00000, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 2, 1, 0, 0xa00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 3, 1, 0, 0xb00800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 4, 1, 0, 0xb80800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 5, 1, 0, 0xbc0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 6, 1, 0, 0xbe0800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 7, 1, 0, 0xbe0800, 0, 20); + t_assert_intratile_offset_el(&dev, &surf, 8, 1, 0, 0xbe0800, 0, 28); + t_assert_intratile_offset_el(&dev, &surf, 9, 1, 0, 0xc00800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 10, 1, 0, 0xc00800, 0, 4); + + /* array layer 2 */ + t_assert_intratile_offset_el(&dev, &surf, 0, 2, 0, 0xc00000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 1, 2, 0, 0x1000000, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 2, 2, 0, 0x1000800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 3, 2, 0, 0x1100800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 4, 2, 0, 0x1180800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 5, 2, 0, 0x11c0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 6, 2, 0, 0x11e0800, 0, 8); + t_assert_intratile_offset_el(&dev, &surf, 7, 2, 0, 0x11e0800, 0, 24); + t_assert_intratile_offset_el(&dev, &surf, 8, 2, 0, 0x1200800, 0, 0); + t_assert_intratile_offset_el(&dev, &surf, 9, 2, 0, 0x1200800, 0, 4); + t_assert_intratile_offset_el(&dev, &surf, 10, 2, 0, 0x1200800, 0, 8); + + /* skip the remaining array layers */ +} + +static void +test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void) +{ + bool ok; + + struct isl_device dev; + isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID), + /*bit6_swizzle*/ false); + + struct isl_surf surf; + ok = isl_surf_init(&dev, &surf, + .dim = ISL_SURF_DIM_3D, + .format = ISL_FORMAT_R8G8B8A8_UNORM, + .width = 256, + .height = 256, + .depth = 256, + .levels = 9, + .array_len = 1, + .samples = 1, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + t_assert(ok); + + t_assert_image_alignment_el(&surf, 4, 4, 1); + t_assert_image_alignment_sa(&surf, 4, 4, 1); + t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916); + t_assert(isl_surf_get_array_pitch_sa_rows(&surf) == + isl_surf_get_array_pitch_el_rows(&surf)); + + uint32_t base_y = 0; + + t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y); + t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y); + t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y); + t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y); + t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y); + t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y); +} + +int main(void) +{ + /* FINISHME: Add tests for npot sizes */ + /* FINISHME: Add tests for 1D surfaces */ + + test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(); + test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(); + test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(); +} diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index cea1e87ccde..5d69039d1af 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -1,6 +1,7 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ + brw_compiler.c \ brw_compiler.h \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ @@ -51,6 +52,7 @@ i965_compiler_FILES = \ brw_shader.cpp \ brw_shader.h \ brw_surface_formats.c \ + brw_surface_formats.h \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index c6ae3d848bf..fd23e23b6f8 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), - brw_imm_ud(0u)); + brw_imm_ud(0u), brw_imm_ud(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c new file mode 100644 index 00000000000..0401e397031 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -0,0 +1,180 @@ +/* + * Copyright © 2015-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_compiler.h" +#include "brw_context.h" +#include "nir.h" +#include "main/errors.h" +#include "util/debug.h" + +static void +shader_debug_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + va_list args; + + va_start(args, fmt); + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); + va_end(args); +} + +static void +shader_perf_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + va_list args_copy; + va_copy(args_copy, args); + vfprintf(stderr, fmt, args_copy); + va_end(args_copy); + } + + if (brw->perf_debug) { + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_PERFORMANCE, + MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); + } + va_end(args); +} + +#define COMMON_OPTIONS \ + /* In order to help allow for better CSE at the NIR level we tell NIR to \ + * split all ffma instructions during opt_algebraic and we then re-combine \ + * them as a later step. \ + */ \ + .lower_ffma = true, \ + .lower_sub = true, \ + .lower_fdiv = true, \ + .lower_scmp = true, \ + .lower_fmod = true, \ + .lower_bitfield_extract = true, \ + .lower_bitfield_insert = true, \ + .lower_uadd_carry = true, \ + .lower_usub_borrow = true, \ + .lower_fdiv = true, \ + .native_integers = true, \ + .vertex_id_zero_based = true + +static const struct nir_shader_compiler_options scalar_nir_options = { + COMMON_OPTIONS, + .lower_pack_half_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, +}; + +static const struct nir_shader_compiler_options vector_nir_options = { + COMMON_OPTIONS, + + /* In the vec4 backend, our dpN instruction replicates its result to all the + * components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + */ + .fdot_replicates = true, + + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_extract_byte = true, + .lower_extract_word = true, +}; + +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) +{ + struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); + + compiler->devinfo = devinfo; + compiler->shader_debug_log = shader_debug_log_mesa; + compiler->shader_perf_log = shader_perf_log_mesa; + + brw_fs_alloc_reg_sets(compiler); + brw_vec4_alloc_reg_set(compiler); + + compiler->scalar_stage[MESA_SHADER_VERTEX] = + devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); + compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); + compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; + compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; + + /* We want the GLSL compiler to emit code that uses condition codes */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; + compiler->glsl_compiler_options[i].MaxIfDepth = + devinfo->gen < 6 ? 16 : UINT_MAX; + + compiler->glsl_compiler_options[i].EmitCondCodes = true; + compiler->glsl_compiler_options[i].EmitNoNoise = true; + compiler->glsl_compiler_options[i].EmitNoMainReturn = true; + compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; + compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; + compiler->glsl_compiler_options[i].LowerClipDistance = true; + + bool is_scalar = compiler->scalar_stage[i]; + + compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; + compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; + compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; + + /* !ARB_gpu_shader5 */ + if (devinfo->gen < 7) + compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; + + compiler->glsl_compiler_options[i].NirOptions = + is_scalar ? &scalar_nir_options : &vector_nir_options; + + compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; + } + + compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; + compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; + + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] + .LowerShaderSharedVariables = true; + + return compiler; +} diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 748ffe58b0c..cd28bbb6bbf 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -94,6 +94,9 @@ struct brw_compiler { struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + /** * Program key structures. @@ -687,6 +690,9 @@ struct brw_gs_prog_data /** @} */ +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + /** * Compile a vertex shader. * diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 10a6d39db85..9edb6f54204 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -63,6 +63,7 @@ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */ #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@ -86,6 +87,7 @@ #define _3DPRIM_TRIFAN_NOSTIPPLE 0x16 #define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); }) +#endif /* bdw_pack.h */ /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + @@ -1085,6 +1087,18 @@ enum opcode { */ SHADER_OPCODE_BROADCAST, + /** + * Pick the byte from its first source register given by the index + * specified as second source. + */ + SHADER_OPCODE_EXTRACT_BYTE, + + /** + * Pick the word from its first source register given by the index + * specified as second source. + */ + SHADER_OPCODE_EXTRACT_WORD, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 7ab70fef1e6..05872255865 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -483,3 +483,15 @@ brw_get_device_info(int devid) return devinfo; } + +const char * +brw_get_device_name(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) case id: return name; +#include "pci_ids/i965_pci_ids.h" + default: + return NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 73d682083a1..48e0dee9084 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -98,3 +98,4 @@ struct brw_device_info }; const struct brw_device_info *brw_get_device_info(int devid); +const char *brw_get_device_name(int devid); diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 8737c6468e5..23e71fd9ba6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -54,23 +54,6 @@ #define FILE_DEBUG_FLAG DEBUG_PRIMS -static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { - [GL_POINTS] =_3DPRIM_POINTLIST, - [GL_LINES] = _3DPRIM_LINELIST, - [GL_LINE_LOOP] = _3DPRIM_LINELOOP, - [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, - [GL_TRIANGLES] = _3DPRIM_TRILIST, - [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [GL_QUADS] = _3DPRIM_QUADLIST, - [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, - [GL_POLYGON] = _3DPRIM_POLYGON, - [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -}; - static const GLenum reduced_prim[GL_POLYGON+1] = { [GL_POINTS] = GL_POINTS, @@ -85,18 +68,6 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { [GL_POLYGON] = GL_TRIANGLES }; -uint32_t -get_hw_prim_for_gl_prim(int mode) -{ - if (mode >= BRW_PRIM_OFFSET) - return mode - BRW_PRIM_OFFSET; - else { - assert(mode < ARRAY_SIZE(prim_to_hw_prim)); - return prim_to_hw_prim[mode]; - } -} - - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the * programs be immune to the active primitive (ie. cope with all diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 41a3f81b9d8..b7ea1bf8d93 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -174,7 +174,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * CSE can later notice that those loads are all the same and eliminate * the redundant ones. */ - fs_reg vec4_offset = vgrf(glsl_type::int_type); + fs_reg vec4_offset = vgrf(glsl_type::uint_type); bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; @@ -433,7 +433,6 @@ fs_reg::fs_reg(struct ::brw_reg reg) : { this->reg_offset = 0; this->subreg_offset = 0; - this->reladdr = NULL; this->stride = 1; if (this->file == IMM && (this->type != BRW_REGISTER_TYPE_V && @@ -448,7 +447,6 @@ fs_reg::equals(const fs_reg &r) const { return (this->backend_reg::equals(r) && subreg_offset == r.subreg_offset && - !reladdr && !r.reladdr && stride == r.stride); } @@ -510,6 +508,7 @@ type_size_scalar(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -739,15 +738,15 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - assert(src[8].file == IMM && src[9].file == IMM); + assert(src[9].file == IMM && src[10].file == IMM); /* Texture coordinates. */ if (i == 0) - return src[8].ud; + return src[9].ud; /* Texture derivatives. */ else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL) - return src[9].ud; + return src[10].ud; /* Texture offset. */ - else if (i == 7) + else if (i == 8) return 2; /* MCS */ else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) @@ -850,7 +849,10 @@ fs_inst::regs_read(int arg) const assert(src[2].file == IMM); unsigned region_length = src[2].ud; - if (src[0].file == FIXED_GRF) { + if (src[0].file == UNIFORM) { + assert(region_length % 4 == 0); + return region_length / 4; + } else if (src[0].file == FIXED_GRF) { /* If the start of the region is not register aligned, then * there's some portion of the register that's technically * unread at the beginning. @@ -864,7 +866,7 @@ fs_inst::regs_read(int arg) const * unread portion at the beginning. */ if (src[0].subnr) - region_length += src[0].subnr * type_sz(src[0].type); + region_length += src[0].subnr; return DIV_ROUND_UP(region_length, REG_SIZE); } else { @@ -1020,7 +1022,6 @@ fs_visitor::import_uniforms(fs_visitor *v) this->push_constant_loc = v->push_constant_loc; this->pull_constant_loc = v->pull_constant_loc; this->uniforms = v->uniforms; - this->param_size = v->param_size; } fs_reg * @@ -1923,9 +1924,7 @@ fs_visitor::compact_virtual_grfs() * maximum number of fragment shader uniform components (64). If * there are too many of these, they'd fill up all of register space. * So, this will push some of them out to the pull constant buffer and - * update the program to load them. We also use pull constants for all - * indirect constant loads because we don't support indirect accesses in - * registers yet. + * update the program to load them. */ void fs_visitor::assign_constant_locations() @@ -1934,20 +1933,21 @@ fs_visitor::assign_constant_locations() if (dispatch_width != 8) return; - unsigned int num_pull_constants = 0; - - pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); - memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms); - bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); + /* For each uniform slot, a value of true indicates that the given slot and + * the next slot must remain contiguous. This is used to keep us from + * splitting arrays apart. + */ + bool contiguous[uniforms]; + memset(contiguous, 0, sizeof(contiguous)); + /* First, we walk through the instructions and do two things: * * 1) Figure out which uniforms are live. * - * 2) Find all indirect access of uniform arrays and flag them as needing - * to go into the pull constant buffer. + * 2) Mark any indirectly used ranges of registers as contiguous. * * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. @@ -1957,20 +1957,19 @@ fs_visitor::assign_constant_locations() if (inst->src[i].file != UNIFORM) continue; - if (inst->src[i].reladdr) { - int uniform = inst->src[i].nr; + int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - assert(param_size[uniform]); - for (int j = 0; j < param_size[uniform]; j++) - pull_constant_loc[uniform + j] = num_pull_constants++; + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { + assert(inst->src[2].ud % 4 == 0); + unsigned last = constant_nr + (inst->src[2].ud / 4) - 1; + assert(last < uniforms); + + for (unsigned j = constant_nr; j < last; j++) { + is_live[j] = true; + contiguous[j] = true; } + is_live[last] = true; } else { - /* Mark the the one accessed uniform as live */ - int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; if (constant_nr >= 0 && constant_nr < (int) uniforms) is_live[constant_nr] = true; } @@ -1985,29 +1984,48 @@ fs_visitor::assign_constant_locations() * If changing this value, note the limitation about total_regs in * brw_curbe.c. */ - unsigned int max_push_components = 16 * 8; + const unsigned int max_push_components = 16 * 8; + + /* For vulkan we don't limit the max_chunk_size. We set it to 32 float = + * 128 bytes, which is the maximum vulkan push constant size. + */ + const unsigned int max_chunk_size = 32; + unsigned int num_push_constants = 0; + unsigned int num_pull_constants = 0; push_constant_loc = ralloc_array(mem_ctx, int, uniforms); + pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); - for (unsigned int i = 0; i < uniforms; i++) { - if (!is_live[i] || pull_constant_loc[i] != -1) { - /* This UNIFORM register is either dead, or has already been demoted - * to a pull const. Mark it as no longer living in the param[] array. - */ - push_constant_loc[i] = -1; + int chunk_start = -1; + for (unsigned u = 0; u < uniforms; u++) { + push_constant_loc[u] = -1; + pull_constant_loc[u] = -1; + + if (!is_live[u]) continue; - } - if (num_push_constants < max_push_components) { - /* Retain as a push constant. Record the location in the params[] - * array. - */ - push_constant_loc[i] = num_push_constants++; - } else { - /* Demote to a pull constant. */ - push_constant_loc[i] = -1; - pull_constant_loc[i] = num_pull_constants++; + /* This is the first live uniform in the chunk */ + if (chunk_start < 0) + chunk_start = u; + + /* If this element does not need to be contiguous with the next, we + * split at this point and everthing between chunk_start and u forms a + * single chunk. + */ + if (!contiguous[u]) { + unsigned chunk_size = u - chunk_start + 1; + + if (num_push_constants + chunk_size <= max_push_components && + chunk_size <= max_chunk_size) { + for (unsigned j = chunk_start; j <= u; j++) + push_constant_loc[j] = num_push_constants++; + } else { + for (unsigned j = chunk_start; j <= u; j++) + pull_constant_loc[j] = num_pull_constants++; + } + + chunk_start = -1; } } @@ -2038,51 +2056,67 @@ fs_visitor::assign_constant_locations() * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs. */ void -fs_visitor::demote_pull_constants() +fs_visitor::lower_constant_loads() { - foreach_block_and_inst (block, fs_inst, inst, cfg) { + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { + /* Set up the annotation tracking for new generated instructions. */ + const fs_builder ibld(this, block, inst); + for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; - int pull_index; + /* We'll handle this case later */ + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) + continue; + unsigned location = inst->src[i].nr + inst->src[i].reg_offset; - if (location >= uniforms) /* Out of bounds access */ - pull_index = -1; - else - pull_index = pull_constant_loc[location]; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; if (pull_index == -1) continue; - /* Set up the annotation tracking for new generated instructions. */ - const fs_builder ibld(this, block, inst); - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst = vgrf(glsl_type::float_type); - assert(inst->src[i].stride == 0); - /* Generate a pull load into dst. */ - if (inst->src[i].reladdr) { - VARYING_PULL_CONSTANT_LOAD(ibld, dst, - brw_imm_ud(index), - *inst->src[i].reladdr, - pull_index * 4); - inst->src[i].reladdr = NULL; - inst->src[i].stride = 1; - } else { - const fs_builder ubld = ibld.exec_all().group(8, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); - inst->src[i].set_smear(pull_index & 3); - } - brw_mark_surface_used(prog_data, index); + fs_reg dst = vgrf(glsl_type::float_type); + const fs_builder ubld = ibld.exec_all().group(8, 0); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, brw_imm_ud(index), offset); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; inst->src[i].reg_offset = 0; + inst->src[i].set_smear(pull_index & 3); + + brw_mark_surface_used(prog_data, index); + } + + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && + inst->src[0].file == UNIFORM) { + + unsigned location = inst->src[0].nr + inst->src[0].reg_offset; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; + + if (pull_index == -1) + continue; + + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, + brw_imm_ud(index), + inst->src[1], + pull_index * 4); + inst->remove(block); + + brw_mark_surface_used(prog_data, index); } } invalidate_live_intervals(); @@ -2792,10 +2826,23 @@ fs_visitor::emit_repclear_shader() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; int base_mrf = 1; int color_mrf = base_mrf + 2; + fs_inst *mov; - fs_inst *mov = bld.exec_all().group(4, 0) - .MOV(brw_message_reg(color_mrf), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + if (uniforms == 1) { + mov = bld.exec_all().group(4, 0) + .MOV(brw_message_reg(color_mrf), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + } else { + struct brw_reg reg = + brw_reg(BRW_GENERAL_REGISTER_FILE, + 2, 3, 0, 0, BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + + mov = bld.exec_all().group(4, 0) + .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)); + } fs_inst *write; if (key->nr_color_regions == 1) { @@ -2824,8 +2871,10 @@ fs_visitor::emit_repclear_shader() assign_curb_setup(); /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ - assert(mov->src[0].file == FIXED_GRF); - mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); + if (uniforms == 1) { + assert(mov->src[0].file == FIXED_GRF); + mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); + } } /** @@ -3651,6 +3700,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &coordinate, const fs_reg &shadow_c, const fs_reg &lod, const fs_reg &lod2, + const fs_reg &surface, const fs_reg &sampler, unsigned coord_components, unsigned grad_components) @@ -3743,8 +3793,9 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = msg_begin.nr; inst->mlen = msg_end.nr - msg_begin.nr; inst->header_size = 1; @@ -3756,6 +3807,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, + const fs_reg &surface, const fs_reg &sampler, const fs_reg &offset_value, unsigned coord_components, @@ -3838,8 +3890,9 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = message.nr; inst->mlen = msg_end.nr - message.nr; inst->header_size = header_size; @@ -3863,7 +3916,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, - const fs_reg &mcs, const fs_reg &sampler, + const fs_reg &mcs, + const fs_reg &surface, + const fs_reg &sampler, fs_reg offset_value, unsigned coord_components, unsigned grad_components) @@ -4066,8 +4121,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, /* Generate the SEND. */ inst->opcode = op; inst->src[0] = src_payload; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = -1; inst->mlen = mlen; inst->header_size = header_size; @@ -4086,25 +4142,27 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &lod2 = inst->src[3]; const fs_reg &sample_index = inst->src[4]; const fs_reg &mcs = inst->src[5]; - const fs_reg &sampler = inst->src[6]; - const fs_reg &offset_value = inst->src[7]; - assert(inst->src[8].file == IMM && inst->src[9].file == IMM); - const unsigned coord_components = inst->src[8].ud; - const unsigned grad_components = inst->src[9].ud; + const fs_reg &surface = inst->src[6]; + const fs_reg &sampler = inst->src[7]; + const fs_reg &offset_value = inst->src[8]; + assert(inst->src[9].file == IMM && inst->src[10].file == IMM); + const unsigned coord_components = inst->src[9].ud; + const unsigned grad_components = inst->src[10].ud; if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - mcs, sampler, offset_value, + mcs, surface, sampler, offset_value, coord_components, grad_components); } else if (devinfo->gen >= 5) { lower_sampler_logical_send_gen5(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - sampler, offset_value, + surface, sampler, offset_value, coord_components, grad_components); } else { lower_sampler_logical_send_gen4(bld, inst, op, coordinate, - shadow_c, lod, lod2, sampler, + shadow_c, lod, lod2, + surface, sampler, coord_components, grad_components); } } @@ -4431,6 +4489,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return 8; + case SHADER_OPCODE_MOV_INDIRECT: + /* Prior to Broadwell, we only have 8 address subregisters */ + return devinfo->gen < 8 ? 8 : inst->exec_size; + default: return inst->exec_size; } @@ -4713,9 +4775,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) break; case UNIFORM: fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset); - if (inst->src[i].reladdr) { - fprintf(file, "+reladdr"); - } else if (inst->src[i].subreg_offset) { + if (inst->src[i].subreg_offset) { fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); } @@ -4826,7 +4886,6 @@ fs_visitor::get_instruction_generating_reg(fs_inst *start, { if (end == start || end->is_partial_write() || - reg.reladdr || !reg.equals(end->dst)) { return NULL; } else { @@ -5039,7 +5098,7 @@ fs_visitor::optimize() bld = fs_builder(this, 64); assign_constant_locations(); - demote_pull_constants(); + lower_constant_loads(); validate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4612a287bb7..ccdf9433798 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -139,7 +139,7 @@ public: void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); - void demote_pull_constants(); + void lower_constant_loads(); void invalidate_live_intervals(); void calculate_live_intervals(); void calculate_register_pressure(); @@ -207,6 +207,8 @@ public: fs_reg mcs, int gather_component, bool is_cube_array, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, @@ -222,7 +224,7 @@ public: void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, - uint32_t spill_offset, int count); + uint32_t spill_offset, int count, bool we_all); void emit_nir_code(); void nir_setup_inputs(); @@ -321,8 +323,6 @@ public: const struct brw_vue_map *input_vue_map; - int *param_size; - int *virtual_grf_start; int *virtual_grf_end; brw::fs_live_variables *live_intervals; @@ -448,6 +448,7 @@ private: void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, struct brw_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 689e0911b17..4c2e360edf9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -72,6 +72,13 @@ channel_expressions_predicate(ir_instruction *ir) return false; switch (expr->operation) { + case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + return false; + /* these opcodes need to act on the whole vector, * just like texturing. */ @@ -162,6 +169,11 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) return visit_continue; switch (expr->operation) { + case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: @@ -399,9 +411,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_ssbo_unsized_array_length: unreachable("should have been lowered"); - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - case ir_binop_pack_half_2x16_split: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 3b65a382dc8..cde6566c05c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -78,6 +78,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_EXTRACT_BYTE: + case SHADER_OPCODE_EXTRACT_WORD: case SHADER_OPCODE_MOV_INDIRECT: return true; case SHADER_OPCODE_RCP: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e05622ae7a8..cac92b37bd5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -351,23 +351,47 @@ fs_generator::generate_mov_indirect(fs_inst *inst, unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ - struct brw_reg addr = vec8(brw_address_reg(0)); + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) { + imm_byte_offset += indirect_byte_offset.ud; - /* The destination stride of an instruction (in bytes) must be greater - * than or equal to the size of the rest of the instruction. Since the - * address register is of type UW, we can't use a D-type instruction. - * In order to get around this, re re-type to UW and use a stride. - */ - indirect_byte_offset = - retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + reg.nr = imm_byte_offset / REG_SIZE; + reg.subnr = imm_byte_offset % REG_SIZE; + brw_MOV(p, dst, reg); + } else { + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); - /* Prior to Broadwell, there are only 8 address registers. */ - assert(inst->exec_size == 8 || devinfo->gen >= 8); + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); - brw_MOV(p, addr, indirect_byte_offset); - brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + if (devinfo->gen < 8) { + /* Prior to broadwell, we have a restriction that the bottom 5 bits + * of the base offset and the bottom 5 bits of the indirect must add + * to less than 32. In other words, the hardware needs to be able to + * add the bottom five bits of the two to get the subnumber and add + * the next 7 bits of each to get the actual register number. Since + * the indirect may cause us to cross a register boundary, this makes + * it almost useless. We could try and do something clever where we + * use a actual base offset if base_offset % 32 == 0 but that would + * mean we were generating different code depending on the base + * offset. Instead, for the sake of consistency, we'll just do the + * add ourselves. + */ + brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type)); + } else { + brw_MOV(p, addr, indirect_byte_offset); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + } + } } void @@ -678,6 +702,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst, void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { int msg_type = -1; @@ -933,14 +958,16 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src ? prog_data->binding_table.gather_texture_start : prog_data->binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, rlen, @@ -949,19 +976,24 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src simd_mode, return_format); - brw_mark_surface_used(prog_data, sampler + base_binding_table_index); + brw_mark_surface_used(prog_data, surface + base_binding_table_index); } else { /* Non-const sampler index */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@ -2070,7 +2102,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(inst, dst, src[0], src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: @@ -2201,6 +2233,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_broadcast(p, dst, src[0], src[1]); break; + case SHADER_OPCODE_EXTRACT_BYTE: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_B + : BRW_REGISTER_TYPE_UB; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 4)); + break; + } + + case SHADER_OPCODE_EXTRACT_WORD: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_W + : BRW_REGISTER_TYPE_UW; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 2)); + break; + } + case FS_OPCODE_SET_SAMPLE_ID: generate_set_sample_id(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 68a86a9a85e..6c3a8d70677 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -179,15 +179,6 @@ fs_visitor::nir_setup_uniforms() return; uniforms = nir->num_uniforms / 4; - - nir_foreach_variable(var, &nir->uniforms) { - /* UBO's and atomics don't take up space in the uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (type_size_scalar(var->type) > 0) - param_size[var->data.driver_location / 4] = type_size_scalar(var->type); - } } static bool @@ -728,15 +719,29 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_fsin: - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fsin: { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); + if (instr->dest.saturate) { + inst->dst = result; + inst->saturate = true; + } else { + bld.MUL(result, tmp, brw_imm_f(0.99997)); + } break; + } - case nir_op_fcos: - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fcos: { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); + if (instr->dest.saturate) { + inst->dst = result; + inst->saturate = true; + } else { + bld.MUL(result, tmp, brw_imm_f(0.99997)); + } break; + } case nir_op_fddx: if (fs_key->high_quality_derivatives) { @@ -807,9 +812,41 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) unreachable("Should have been lowered by borrow_to_arith()."); case nir_op_umod: + case nir_op_irem: + /* According to the sign table for INT DIV in the Ivy Bridge PRM, it + * appears that our hardware just does the right thing for signed + * remainder. + */ bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); break; + case nir_op_imod: { + /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ + bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); + + /* Math instructions don't support conditional mod */ + inst = bld.MOV(bld.null_reg_d(), result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Now, we need to determine if signs of the sources are different. + * When we XOR the sources, the top bit is 0 if they are the same and 1 + * if they are different. We can then use a conditional modifier to + * turn that into a predicate. This leads us to an XOR.l instruction. + */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); + inst = bld.XOR(tmp, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_L; + + /* If the result of the initial remainder operation is non-zero and the + * two sources have different signs, add in a copy of op[1] to get the + * final integer modulus value. + */ + inst = bld.ADD(result, result, op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + case nir_op_flt: case nir_op_ilt: case nir_op_ult: @@ -947,6 +984,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D); + fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F); + fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); + + /* The destination stride must be at least as big as the source stride. */ + tmp16.type = BRW_REGISTER_TYPE_W; + tmp16.stride = 2; + + /* Check for denormal */ + fs_reg abs_src0 = op[0]; + abs_src0.abs = true; + bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L); + /* Get the appropriately signed zero */ + bld.AND(retype(zero, BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000)); + /* Do the actual F32 -> F16 -> F32 conversion */ + bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]); + bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16); + /* Select that or zero based on normal status */ + inst = bld.SEL(result, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: @@ -1079,6 +1144,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->predicate = BRW_PREDICATE_NORMAL; break; + case nir_op_extract_ubyte: + case nir_op_extract_ibyte: { + nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); + bld.emit(SHADER_OPCODE_EXTRACT_BYTE, + result, op[0], brw_imm_ud(byte->u[0])); + break; + } + + case nir_op_extract_uword: + case nir_op_extract_iword: { + nir_const_value *word = nir_src_as_const_value(instr->src[1].src); + bld.emit(SHADER_OPCODE_EXTRACT_WORD, + result, op[0], brw_imm_ud(word->u[0])); + break; + } + default: unreachable("unhandled instruction"); } @@ -1154,6 +1235,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { fs_reg image(UNIFORM, deref->var->data.driver_location / 4, BRW_REGISTER_TYPE_UD); + fs_reg indirect; + unsigned indirect_max = 0; for (const nir_deref *tail = &deref->deref; tail->child; tail = tail->child) { @@ -1165,7 +1248,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) image = offset(image, bld, base * element_size); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - fs_reg tmp = vgrf(glsl_type::int_type); + fs_reg tmp = vgrf(glsl_type::uint_type); if (devinfo->gen == 7 && !devinfo->is_haswell) { /* IVB hangs when trying to access an invalid surface index with @@ -1183,15 +1266,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) bld.MOV(tmp, get_nir_src(deref_array->indirect)); } + indirect_max += element_size * (tail->type->length - 1); + bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); - if (image.reladdr) - bld.ADD(*image.reladdr, *image.reladdr, tmp); - else - image.reladdr = new(mem_ctx) fs_reg(tmp); + if (indirect.file == BAD_FILE) { + indirect = tmp; + } else { + bld.ADD(indirect, indirect, tmp); + } } } - return image; + if (indirect.file == BAD_FILE) { + return image; + } else { + /* Emit a pile of MOVs to load the uniform into a temporary. The + * dead-code elimination pass will get rid of what we don't use. + */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(tmp, bld, j), offset(image, bld, j), + indirect, brw_imm_ud((indirect_max + 1) * 4)); + } + return tmp; + } } void @@ -2280,6 +2379,82 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); break; + case nir_intrinsic_load_shared: { + assert(devinfo->gen >= 7); + + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Get the offset to read from */ + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + + /* Read the vector */ + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + + break; + } + + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0] + 4 * first_component)); + } + + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); + } + + break; + } + default: nir_emit_intrinsic(bld, instr); break; @@ -2492,12 +2667,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Offsets are in bytes but they should always be multiples of 4 */ assert(const_offset->u[0] % 4 == 0); src.reg_offset = const_offset->u[0] / 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + } } else { - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - } + fs_reg indirect = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); - for (unsigned j = 0; j < instr->num_components; j++) { - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + /* We need to pass a size to the MOV_INDIRECT but we don't want it to + * go past the end of the uniform. In order to keep the n'th + * component from running past, we subtract off the size of all but + * one component of the vector. + */ + assert(instr->const_index[1] >= instr->num_components * 4); + unsigned read_size = instr->const_index[1] - + (instr->num_components - 1) * 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(dest, bld, j), offset(src, bld, j), + indirect, brw_imm_ud(read_size)); + } } break; } @@ -2605,82 +2796,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_shared: { - assert(devinfo->gen >= 7); - - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Get the offset to read from */ - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0])); - } - - /* Read the vector */ - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, - instr->num_components, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - for (int i = 0; i < instr->num_components; i++) - bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); - - break; - } - - case nir_intrinsic_store_shared: { - assert(devinfo->gen >= 7); - - /* Block index */ - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Value */ - fs_reg val_reg = get_nir_src(instr->src[0]); - - /* Writemask */ - unsigned writemask = instr->const_index[1]; - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - */ - while (writemask) { - unsigned first_component = ffs(writemask) - 1; - unsigned length = ffs(~(writemask >> first_component)) - 1; - fs_reg offset_reg; - - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + - 4 * first_component); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0] + 4 * first_component)); - } - - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, first_component), - 1 /* dims */, length, - BRW_PREDICATE_NONE); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - writemask &= (15 << (first_component + length)); - } - - break; - } - case nir_intrinsic_load_input: { fs_reg src; if (stage == MESA_SHADER_VERTEX) { @@ -2924,7 +3039,9 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + fs_reg texture_reg(brw_imm_ud(texture)); fs_reg sampler_reg(brw_imm_ud(sampler)); int gather_component = instr->component; @@ -2937,6 +3054,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; + /* Our hardware requires a LOD for buffer textures */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) + lod = brw_imm_d(0); + for (unsigned i = 0; i < instr->num_srcs; i++) { fs_reg src = get_nir_src(instr->src[i].src); switch (instr->src[i].src_type) { @@ -2991,9 +3112,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_projector: unreachable("should be lowered"); - case nir_tex_src_sampler_offset: { - /* Figure out the highest possible sampler index and mark it as used */ - uint32_t max_used = sampler + instr->sampler_array_size - 1; + case nir_tex_src_texture_offset: { + /* Figure out the highest possible texture index and mark it as used */ + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { max_used += stage_prog_data->binding_table.gather_texture_start; } else { @@ -3002,6 +3123,14 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) brw_mark_surface_used(prog_data, max_used); /* Emit code to evaluate the actual indexing expression */ + texture_reg = vgrf(glsl_type::uint_type); + bld.ADD(texture_reg, src, brw_imm_ud(texture)); + texture_reg = bld.emit_uniformize(texture_reg); + break; + } + + case nir_tex_src_sampler_offset: { + /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); bld.ADD(sampler_reg, src, brw_imm_ud(sampler)); sampler_reg = bld.emit_uniformize(sampler_reg); @@ -3016,8 +3145,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) if (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical) { if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg); + key_tex->compressed_multisample_layout_mask & (1 << texture)) { + mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { mcs = brw_imm_ud(0u); } @@ -3054,7 +3183,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D); fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst, bld.vgrf(BRW_REGISTER_TYPE_D, 1), - sampler_reg); + texture_reg, texture_reg); inst->mlen = 1; inst->header_size = 1; inst->base_mrf = -1; @@ -3068,7 +3197,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, sampler, sampler_reg); + is_cube_array, texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2347cd5d33f..8396854fcb1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -751,6 +751,7 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, dst); unspill_inst->offset = spill_offset; unspill_inst->regs_written = reg_size; + unspill_inst->force_writemask_all = true; if (!gen7_read) { unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; @@ -764,11 +765,11 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, void fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, - uint32_t spill_offset, int count) + uint32_t spill_offset, int count, bool we_all) { int reg_size = 1; int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; - if (dispatch_width == 16 && count % 2 == 0) { + if (inst->exec_size == 16 && count % 2 == 0) { spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen); reg_size = 2; } @@ -784,6 +785,8 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf; + spill_inst->force_writemask_all = we_all; + spill_inst->force_sechalf = inst->force_sechalf; } } @@ -805,30 +808,13 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) */ foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == VGRF) { + if (inst->src[i].file == VGRF) spill_costs[inst->src[i].nr] += loop_scale; - - /* Register spilling logic assumes full-width registers; smeared - * registers have a width of 1 so if we try to spill them we'll - * generate invalid assembly. This shouldn't be a problem because - * smeared registers are only used as short-term temporaries when - * loading pull constants, so spilling them is unlikely to reduce - * register pressure anyhow. - */ - if (!inst->src[i].is_contiguous()) { - no_spill[inst->src[i].nr] = true; - } - } } - if (inst->dst.file == VGRF) { + if (inst->dst.file == VGRF) spill_costs[inst->dst.nr] += inst->regs_written * loop_scale; - if (!inst->dst.is_contiguous()) { - no_spill[inst->dst.nr] = true; - } - } - switch (inst->opcode) { case BRW_OPCODE_DO: @@ -938,12 +924,15 @@ fs_visitor::spill_reg(int spill_reg) * inst->regs_written(), then we need to unspill the destination * since we write back out all of the regs_written(). */ - if (inst->is_partial_write()) + bool need_unspill = inst->is_partial_write() || + type_sz(inst->dst.type) != 4; + if (need_unspill) emit_unspill(block, inst, spill_src, subset_spill_offset, inst->regs_written); emit_spill(block, inst, spill_src, subset_spill_offset, - inst->regs_written); + inst->regs_written, + need_unspill || inst->force_writemask_all); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 45694ec0894..f2faceeb579 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -717,6 +717,15 @@ namespace { bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), brw_imm_d(-(int)scale(widths[c] - s) - 1), BRW_CONDITIONAL_GE); + + /* Mask off all but the bits we actually want. Otherwise, if + * we pass a negative number into the hardware when it's + * expecting something like UINT8, it will happily clamp it to + * +255 for us. + */ + if (is_signed && widths[c] < 32) + bld.AND(offset(dst, bld, c), offset(dst, bld, c), + brw_imm_d((1 << widths[c]) - 1)); } } @@ -787,6 +796,15 @@ namespace { /* Convert to integer. */ bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); bld.MOV(offset(dst, bld, c), offset(fdst, bld, c)); + + /* Mask off all but the bits we actually want. Otherwise, if + * we pass a negative number into the hardware when it's + * expecting something like UINT8, it will happily clamp it to + * +255 for us. + */ + if (is_signed && widths[c] < 32) + bld.AND(offset(dst, bld, c), offset(dst, bld, c), + brw_imm_d((1 << widths[c]) - 1)); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index aad512f4be6..6b9bfcf0b85 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -79,12 +79,12 @@ fs_visitor::emit_vs_system_value(int location) /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &sampler) + const fs_reg &texture) { const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - sampler, fs_reg(), brw_imm_ud(components), brw_imm_d(0) + texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@ -108,6 +108,8 @@ fs_visitor::emit_texture(ir_texture_opcode op, fs_reg mcs, int gather_component, bool is_cube_array, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg) { @@ -147,7 +149,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1)); const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, - sample_index, mcs, sampler_reg, offset_value, + sample_index, mcs, surface_reg, sampler_reg, offset_value, brw_imm_d(coord_components), brw_imm_d(grad_components) }; enum opcode opcode; @@ -200,7 +202,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_tg4) { if (gather_component == 1 && - key_tex->gather_channel_quirk_mask & (1 << sampler)) { + key_tex->gather_channel_quirk_mask & (1 << surface)) { /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ @@ -210,7 +212,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, } if (devinfo->gen == 6) - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); } /* fixup #layers for cube map arrays */ @@ -924,6 +926,8 @@ void fs_visitor::emit_barrier() { assert(devinfo->gen >= 7); + const uint32_t barrier_id_mask = + devinfo->gen >= 9 ? 0x8f000000u : 0x0f000000u; /* We are getting the barrier ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); @@ -937,7 +941,7 @@ fs_visitor::emit_barrier() /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); - pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u)); + pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask)); /* Emit a gateway "barrier" message using the payload we set up, followed * by a wait instruction. @@ -1035,9 +1039,6 @@ fs_visitor::init() this->spilled_any_registers = false; this->do_dual_src = false; - - if (dispatch_width == 8) - this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params); } fs_visitor::~fs_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index c3eec2efb42..e4f20f4ffc9 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -58,8 +58,6 @@ public: */ int subreg_offset; - fs_reg *reladdr; - /** Register region horizontal stride */ uint8_t stride; }; @@ -136,8 +134,7 @@ component(fs_reg reg, unsigned idx) static inline bool is_uniform(const fs_reg ®) { - return (reg.stride == 0 || reg.is_null()) && - (!reg.reladdr || is_uniform(*reg.reladdr)); + return (reg.stride == 0 || reg.is_null()); } /** diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index db4ba88cb1c..f48c6fe12a1 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -73,36 +73,13 @@ brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, exec_list *ir) { - const struct brw_compiler *compiler = brw->intelScreen->compiler; - - int ops = LOWER_PACK_SNORM_2x16 - | LOWER_UNPACK_SNORM_2x16 - | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16; - - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_UNPACK_UNORM_4x8 - | LOWER_UNPACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8 - | LOWER_PACK_SNORM_4x8; - } - - if (brw->gen >= 7) { - /* Gen7 introduced the f32to16 and f16to32 instructions, which can be - * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no - * lowering is needed. For SOA code, the Half2x16 ops must be - * scalarized. - */ - if (compiler->scalar_stage[shader_type]) { - ops |= LOWER_PACK_HALF_2x16_TO_SPLIT - | LOWER_UNPACK_HALF_2x16_TO_SPLIT; - } - } else { - ops |= LOWER_PACK_HALF_2x16 - | LOWER_UNPACK_HALF_2x16; - } + /* Gens < 7 don't have instructions to convert to or from half-precision, + * and Gens < 6 don't expose that functionality. + */ + if (brw->gen != 6) + return; - lower_packing_builtins(ir, ops); + lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16); } static void diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 287f935d539..ef5b34cc687 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -409,6 +409,14 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } +static void +brw_nir_lower_shared(nir_shader *nir) +{ + nir_assign_var_locations(&nir->shared, &nir->num_shared, + type_size_scalar_bytes); + nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); +} + #define OPT(pass, ...) ({ \ bool this_progress = false; \ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ @@ -488,7 +496,7 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar) /* Get rid of split copies */ nir = nir_optimize(nir, is_scalar); - OPT(nir_remove_dead_variables); + OPT(nir_remove_dead_variables, nir_var_local); return nir; } @@ -504,6 +512,8 @@ brw_nir_lower_io(nir_shader *nir, OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); OPT_V(brw_nir_lower_outputs, devinfo, is_scalar); + if (nir->stage == MESA_SHADER_COMPUTE) + OPT_V(brw_nir_lower_shared); OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); return nir_optimize(nir, is_scalar); @@ -529,7 +539,7 @@ brw_postprocess_nir(nir_shader *nir, if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ - OPT(brw_nir_opt_peephole_ffma); +// OPT(brw_nir_opt_peephole_ffma); } OPT(nir_opt_algebraic_late); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 368efeecb2d..c9872b68d75 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -279,7 +279,7 @@ brw_get_scratch_bo(struct brw_context *brw, void brwInitFragProgFuncs( struct dd_function_table *functions ) { - assert(functions->ProgramStringNotify == _tnl_program_string); + /* assert(functions->ProgramStringNotify == _tnl_program_string); */ functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d92bad25a72..e4ce8cbf748 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -29,136 +29,6 @@ #include "brw_vec4_tes.h" #include "main/shaderobj.h" #include "main/uniforms.h" -#include "util/debug.h" - -static void -shader_debug_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - va_list args; - - va_start(args, fmt); - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); - va_end(args); -} - -static void -shader_perf_log_mesa(void *data, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - - va_list args; - va_start(args, fmt); - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { - va_list args_copy; - va_copy(args_copy, args); - vfprintf(stderr, fmt, args_copy); - va_end(args_copy); - } - - if (brw->perf_debug) { - GLuint msg_id = 0; - _mesa_gl_vdebug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_PERFORMANCE, - MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); - } - va_end(args); -} - -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) -{ - struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); - - compiler->devinfo = devinfo; - compiler->shader_debug_log = shader_debug_log_mesa; - compiler->shader_perf_log = shader_perf_log_mesa; - - brw_fs_alloc_reg_sets(compiler); - brw_vec4_alloc_reg_set(compiler); - - compiler->scalar_stage[MESA_SHADER_VERTEX] = - devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); - compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; - compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); - compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); - compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; - compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; - - nir_shader_compiler_options *nir_options = - rzalloc(compiler, nir_shader_compiler_options); - nir_options->native_integers = true; - nir_options->lower_fdiv = true; - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - nir_options->lower_ffma = true; - nir_options->lower_sub = true; - nir_options->lower_fdiv = true; - nir_options->lower_scmp = true; - nir_options->lower_fmod = true; - nir_options->lower_bitfield_extract = true; - nir_options->lower_bitfield_insert = true; - nir_options->lower_uadd_carry = true; - nir_options->lower_usub_borrow = true; - - /* In the vec4 backend, our dpN instruction replicates its result to all - * the components of a vec4. We would like NIR to give us replicated fdot - * instructions because it can optimize better for us. - * - * For the FS backend, it should be lowered away by the scalarizing pass so - * we should never see fdot anyway. - */ - nir_options->fdot_replicates = true; - - /* We want the GLSL compiler to emit code that uses condition codes */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; - compiler->glsl_compiler_options[i].MaxIfDepth = - devinfo->gen < 6 ? 16 : UINT_MAX; - - compiler->glsl_compiler_options[i].EmitCondCodes = true; - compiler->glsl_compiler_options[i].EmitNoNoise = true; - compiler->glsl_compiler_options[i].EmitNoMainReturn = true; - compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; - compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; - compiler->glsl_compiler_options[i].LowerClipDistance = true; - - bool is_scalar = compiler->scalar_stage[i]; - - compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; - compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; - compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; - - /* !ARB_gpu_shader5 */ - if (devinfo->gen < 7) - compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - - compiler->glsl_compiler_options[i].NirOptions = nir_options; - - compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; - } - - compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; - compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; - - if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) - compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; - - compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] - .LowerShaderSharedVariables = true; - - return compiler; -} extern "C" struct gl_shader * brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) @@ -214,6 +84,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -442,6 +313,10 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_BROADCAST: return "broadcast"; + case SHADER_OPCODE_EXTRACT_BYTE: + return "extract_byte"; + case SHADER_OPCODE_EXTRACT_WORD: + return "extract_word"; case VEC4_OPCODE_MOV_BYTES: return "mov_bytes"; case VEC4_OPCODE_PACK_BYTES: diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 593361348fd..82374a46c18 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -259,9 +259,6 @@ struct brw_gs_compile unsigned control_data_header_size_bits; }; -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); - void brw_assign_common_binding_table_offsets(gl_shader_stage stage, const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index b5c1a3531c2..f42a9531683 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -25,21 +25,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - -struct surface_format_info { - bool exists; - int sampling; - int filtering; - int shadow_compare; - int chroma_key; - int render_target; - int alpha_blend; - int input_vb; - int streamed_output_vb; - int color_processing; - int lossless_compression; - const char *name; -}; +#include "brw_wm.h" +#include "brw_surface_formats.h" /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. @@ -86,7 +73,7 @@ struct surface_format_info { * - VOL4_Part1 section 3.9.11 Render Target Write. * - Render Target Surface Types [SKL+] */ -const struct surface_format_info surface_formats[] = { +const struct brw_surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color ccs_e */ SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) @@ -218,7 +205,7 @@ const struct surface_format_info surface_formats[] = { SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( Y, Y, x, Y, 90, x, x, x, x, x, A4B4G4R4_UNORM) SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) @@ -281,13 +268,13 @@ const struct surface_format_info surface_formats[] = { SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, x, EAC_R11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_R11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_RG11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8) SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT) SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT) SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED) @@ -302,10 +289,10 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT) SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT) SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) @@ -618,7 +605,7 @@ brw_init_surface_formats(struct brw_context *brw) for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) { uint32_t texture, render; - const struct surface_format_info *rinfo, *tinfo; + const struct brw_surface_format_info *rinfo, *tinfo; bool is_integer = _mesa_is_format_integer_color(format); render = texture = brw_format_for_mesa_format(format); @@ -827,7 +814,7 @@ bool brw_losslessly_compressible_format(struct brw_context *brw, uint32_t brw_format) { - const struct surface_format_info * const sinfo = + const struct brw_surface_format_info * const sinfo = &surface_formats[brw_format]; const int gen = brw->gen * 10; diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h new file mode 100644 index 00000000000..a5cd49f5260 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +struct brw_surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; + int lossless_compression; + const char *name; +}; + +extern const struct brw_surface_format_info surface_formats[]; diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index bf7f9c61c84..934b6b8d627 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -98,3 +98,31 @@ GLuint brw_translate_blend_factor( GLenum factor ) unreachable("not reached"); } } + +static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { + [GL_POINTS] =_3DPRIM_POINTLIST, + [GL_LINES] = _3DPRIM_LINELIST, + [GL_LINE_LOOP] = _3DPRIM_LINELOOP, + [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, + [GL_TRIANGLES] = _3DPRIM_TRILIST, + [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [GL_QUADS] = _3DPRIM_QUADLIST, + [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [GL_POLYGON] = _3DPRIM_POLYGON, + [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +}; + +uint32_t +get_hw_prim_for_gl_prim(int mode) +{ + if (mode >= BRW_PRIM_OFFSET) + return mode - BRW_PRIM_OFFSET; + else { + assert(mode < ARRAY_SIZE(prim_to_hw_prim)); + return prim_to_hw_prim[mode]; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e7aec1f1a9d..394e32169d9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -477,11 +477,6 @@ vec4_visitor::split_uniform_registers() inst->src[i].reg_offset = 0; } } - - /* Update that everything is now vector-sized. */ - for (int i = 0; i < this->uniforms; i++) { - this->uniform_size[i] = 1; - } } void @@ -539,7 +534,6 @@ vec4_visitor::pack_uniform_registers() * push constants. */ for (int src = 0; src < uniforms; src++) { - assert(src < uniform_array_size); int size = chans_used[src]; if (size == 0) @@ -786,7 +780,7 @@ vec4_visitor::move_push_constants_to_pull_constants() dst_reg temp = dst_reg(this, glsl_type::vec4_type); emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + pull_constant_loc[uniform], src_reg()); inst->src[i].file = temp.file; inst->src[i].nr = temp.nr; @@ -1606,8 +1600,6 @@ vec4_visitor::setup_uniforms(int reg) * matter what, or the GPU would hang. */ if (devinfo->gen < 6 && this->uniforms == 0) { - assert(this->uniforms < this->uniform_array_size); - stage_prog_data->param = reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); for (unsigned int i = 0; i < 4; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 1460f4599bf..e5ae9f24156 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -115,8 +115,6 @@ public: */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; - int *uniform_size; - int uniform_array_size; /*< Size of the uniform_size array */ int uniforms; src_reg shader_start_time; @@ -260,6 +258,7 @@ public: src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, @@ -284,8 +283,6 @@ public: src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg *reladdr, int reg_offset); - src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset); void emit_scratch_read(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, @@ -295,7 +292,8 @@ public: void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, - int base_offset); + int base_offset, + src_reg indirect); void emit_pull_constant_load_reg(dst_reg dst, src_reg surf_index, src_reg offset, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 730be2142aa..237534decba 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -109,6 +109,7 @@ generate_tex(struct brw_codegen *p, vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { const struct brw_device_info *devinfo = p->devinfo; @@ -264,14 +265,16 @@ generate_tex(struct brw_codegen *p, ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, dst, inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, 1, /* response length */ @@ -285,14 +288,19 @@ generate_tex(struct brw_codegen *p, /* Non-constant sampler index. */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@ -1383,6 +1391,48 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p, } static void +generate_mov_indirect(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, struct brw_reg reg, + struct brw_reg indirect, struct brw_reg length) +{ + assert(indirect.type == BRW_REGISTER_TYPE_UD); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2); + + /* This instruction acts in align1 mode */ + assert(inst->force_writemask_all || reg.writemask == 0xf); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + struct brw_reg addr = vec2(brw_address_reg(0)); + + /* We need to move the indirect value into the address register. In order + * to make things make some sense, we want to respect at least the X + * component of the swizzle. In order to do that, we need to convert the + * subnr (probably 0) to an align1 subnr and add in the swizzle. We then + * use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect + * and splat it out to all four channels of the given half of a0. + */ + assert(brw_is_single_value_swizzle(indirect.swizzle)); + indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2; + indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0); + + brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset)); + + /* Use a <4,1> region Vx1 region*/ + struct brw_reg src = brw_VxH_indirect(0, 0); + src.width = BRW_WIDTH_4; + src.hstride = BRW_HORIZONTAL_STRIDE_1; + + brw_MOV(p, dst, retype(src, reg.type)); + + brw_pop_insn_state(p); +} + +static void generate_code(struct brw_codegen *p, const struct brw_compiler *compiler, void *log_data, @@ -1664,7 +1714,7 @@ generate_code(struct brw_codegen *p, case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(p, prog_data, inst, dst, src[0], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1], src[2]); break; case VS_OPCODE_URB_WRITE: @@ -1928,6 +1978,9 @@ generate_code(struct brw_codegen *p, brw_WAIT(p); break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]); + default: unreachable("Unsupported opcode"); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 1b87e3044c2..2b261fcc952 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -132,15 +132,6 @@ void vec4_visitor::nir_setup_uniforms() { uniforms = nir->num_uniforms / 16; - - nir_foreach_variable(var, &nir->uniforms) { - /* UBO's and atomics don't take up space in the uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (type_size_vec4(var->type) > 0) - uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type); - } } void @@ -710,12 +701,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Offsets are in bytes but they should always be multiples of 16 */ assert(const_offset->u[0] % 16 == 0); src.reg_offset = const_offset->u[0] / 16; + + emit(MOV(dest, src)); } else { - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); - src.reladdr = new(mem_ctx) src_reg(tmp); - } + src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); - emit(MOV(dest, src)); + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, + indirect, brw_imm_ud(instr->const_index[1])); + } break; } @@ -1093,15 +1086,29 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_fsin: - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fsin: { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); + if (instr->dest.saturate) { + inst->dst = dst; + inst->saturate = true; + } else { + emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } break; + } - case nir_op_fcos: - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); - inst->saturate = instr->dest.saturate; + case nir_op_fcos: { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); + if (instr->dest.saturate) { + inst->dst = dst; + inst->saturate = true; + } else { + emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } break; + } case nir_op_idiv: case nir_op_udiv: @@ -1109,9 +1116,41 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_umod: + case nir_op_irem: + /* According to the sign table for INT DIV in the Ivy Bridge PRM, it + * appears that our hardware just does the right thing for signed + * remainder. + */ emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); break; + case nir_op_imod: { + /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ + inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); + + /* Math instructions don't support conditional mod */ + inst = emit(MOV(dst_null_d(), src_reg(dst))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Now, we need to determine if signs of the sources are different. + * When we XOR the sources, the top bit is 0 if they are the same and 1 + * if they are different. We can then use a conditional modifier to + * turn that into a predicate. This leads us to an XOR.l instruction. + */ + src_reg tmp = src_reg(this, glsl_type::ivec4_type); + inst = emit(XOR(dst_reg(tmp), op[0], op[1])); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_L; + + /* If the result of the initial remainder operation is non-zero and the + * two sources have different signs, add in a copy of op[1] to get the + * final integer modulus value. + */ + inst = emit(ADD(dst, src_reg(dst), op[1])); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + case nir_op_ldexp: unreachable("not reached: should be handled by ldexp_to_arith()"); @@ -1181,6 +1220,32 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + /* See also vec4_visitor::emit_pack_half_2x16() */ + src_reg tmp16 = src_reg(this, glsl_type::uvec4_type); + src_reg tmp32 = src_reg(this, glsl_type::vec4_type); + src_reg zero = src_reg(this, glsl_type::vec4_type); + + /* Check for denormal */ + src_reg abs_src0 = op[0]; + abs_src0.abs = true; + emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L)); + /* Get the appropriately signed zero */ + emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000))); + /* Do the actual F32 -> F16 -> F32 conversion */ + emit(F32TO16(dst_reg(tmp16), op[0])); + emit(F16TO32(dst_reg(tmp32), tmp16)); + /* Select that or zero based on normal status */ + inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: @@ -1325,6 +1390,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_pack_unorm_2x16: unreachable("not reached: should be handled by lower_packing_builtins"); + case nir_op_pack_uvec4_to_uint: + unreachable("not reached"); + + case nir_op_pack_uvec2_to_uint: { + dst_reg tmp1 = dst_reg(this, glsl_type::uint_type); + tmp1.writemask = WRITEMASK_X; + op[0].swizzle = BRW_SWIZZLE_YYYY; + emit(SHL(tmp1, op[0], src_reg(brw_imm_ud(16u)))); + + dst_reg tmp2 = dst_reg(this, glsl_type::uint_type); + tmp2.writemask = WRITEMASK_X; + op[0].swizzle = BRW_SWIZZLE_XXXX; + emit(AND(tmp2, op[0], src_reg(brw_imm_ud(0xffffu)))); + + emit(OR(dst, src_reg(tmp1), src_reg(tmp2))); + break; + } + case nir_op_unpack_half_2x16: /* As NIR does not guarantee that we have a correct swizzle outside the * boundaries of a vector, and the implementation of emit_unpack_half_2x16 @@ -1568,7 +1651,6 @@ vec4_visitor::nir_emit_jump(nir_jump_instr *instr) break; case nir_jump_return: - /* fall through */ default: unreachable("unknown jump"); } @@ -1621,7 +1703,9 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type, void vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + src_reg texture_reg = brw_imm_ud(texture); src_reg sampler_reg = brw_imm_ud(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; @@ -1636,6 +1720,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_tex_instr_dest_size(instr)); dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); + /* Our hardware requires a LOD for buffer textures */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) + lod = brw_imm_d(0); + /* Load the texture operation sources */ for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { @@ -1697,13 +1785,12 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); break; - case nir_tex_src_sampler_offset: { - /* The highest sampler which may be used by this operation is + case nir_tex_src_texture_offset: { + /* The highest texture which may be used by this operation is * the last element of the array. Mark it here, because the generator * doesn't have enough information to determine the bound. */ - uint32_t array_size = instr->sampler_array_size; - uint32_t max_used = sampler + array_size - 1; + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4) { max_used += prog_data->base.binding_table.gather_texture_start; } else { @@ -1715,6 +1802,15 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); + texture_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_sampler_offset: { + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler))); sampler_reg = emit_uniformize(temp); break; @@ -1753,7 +1849,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Stuff the channel select bits in the top of the texture offset */ if (instr->op == nir_texop_tg4) { if (instr->component == 1 && - (key_tex->gather_channel_quirk_mask & (1 << sampler))) { + (key_tex->gather_channel_quirk_mask & (1 << texture))) { /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ @@ -1774,7 +1870,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) shadow_comparitor, lod, lod2, sample_index, constant_offset, offset_value, - mcs, is_cube_array, sampler, sampler_reg); + mcs, is_cube_array, + texture, texture_reg, sampler, sampler_reg); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 9b75f45edb0..b7d02e90a86 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -59,8 +59,6 @@ vec4_tcs_visitor::emit_nir_code() * copies VS outputs to TES inputs. */ uniforms = 2; - uniform_size[0] = 1; - uniform_size[1] = 1; uint64_t varyings = key->outputs_written; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 443d0eb5387..0c5bfb8579c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -622,6 +622,7 @@ type_size_vec4(const struct glsl_type *type) case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -877,6 +878,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op, src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, + src_reg surface_reg, uint32_t sampler, src_reg sampler_reg) { @@ -942,7 +945,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op, inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = shadow_comparitor.file != BAD_FILE; - inst->src[1] = sampler_reg; + inst->src[1] = surface_reg; + inst->src[2] = sampler_reg; /* MRF for the first parameter */ int param_base = inst->base_mrf + inst->header_size; @@ -1068,7 +1072,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, } if (devinfo->gen == 6 && op == ir_tg4) { - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } if (op == ir_query_levels) { @@ -1464,27 +1468,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, } } -src_reg -vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, - src_reg *reladdr, int reg_offset) -{ - if (reladdr) { - src_reg index = src_reg(this, glsl_type::int_type); - - emit_before(block, inst, ADD(dst_reg(index), *reladdr, - brw_imm_d(reg_offset * 16))); - - return index; - } else if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); - return offset; - } else { - return brw_imm_d(reg_offset * 16); - } -} - /** * Emits an instruction before @inst to load the value named by @orig_src * from scratch space at @base_offset to @temp. @@ -1662,12 +1645,24 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; - src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, - reg_offset); + + src_reg offset; + if (indirect.file != BAD_FILE) { + offset = src_reg(this, glsl_type::int_type); + + emit_before(block, inst, ADD(dst_reg(offset), indirect, + brw_imm_d(reg_offset * 16))); + } else if (devinfo->gen >= 8) { + /* Store the offset in a GRF so we can send-from-GRF. */ + offset = src_reg(this, glsl_type::int_type); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); + } else { + offset = brw_imm_d(reg_offset * 16); + } emit_pull_constant_load_reg(temp, brw_imm_ud(index), @@ -1694,59 +1689,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() { int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. - * - * Note that we don't move constant-indexed accesses to arrays. No - * testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to + * be pulled. We mark something as needing to be pulled by setting + * pull_constant_loc to 0. */ - do { - nested_reladdr = false; - - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - int uniform = inst->src[i].nr; + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - if (inst->src[i].reladdr->reladdr) - nested_reladdr = true; /* will need another pass */ + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) + pull_constant_loc[uniform_nr + j] = 0; + } - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = - &stage_prog_data->param[uniform * 4]; + /* Next, we walk the list of uniforms and assign real pull constant + * locations and set their corresponding entries in pull_param. + */ + for (int j = 0; j < this->uniforms; j++) { + if (pull_constant_loc[j] < 0) + continue; - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; - assert(uniform < uniform_array_size); - for (int j = 0; j < uniform_size[uniform] * 4; j++) { - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] - = values[j]; - } - } + for (int i = 0; i < 4; i++) { + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] + = stage_prog_data->param[j * 4 + i]; + } + } - /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + /* Finally, we can walk through the instructions and lower MOV_INDIRECT + * instructions to actual uniform pulls. + */ + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); - inst->src[i].file = temp.file; - inst->src[i].nr = temp.nr; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; - } - } - } while (nested_reladdr); + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], + pull_constant_loc[uniform_nr], inst->src[1]); + inst->remove(block); + } /* Now there are no accesses of the UNIFORM file with a reladdr, so * no need to track them as larger-than-vec4 objects. This will be @@ -1799,17 +1790,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; - - /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires - * at least one. See setup_uniforms() in brw_vec4.cpp. - */ - this->uniform_array_size = 1; - if (prog_data) { - this->uniform_array_size = - MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); - } - - this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 1d6914902b3..86701f3fbd8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -270,7 +270,6 @@ void vec4_vs_visitor::setup_uniform_clipplane_values() { for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { - assert(this->uniforms < uniform_array_size); this->userplane[i] = dst_reg(UNIFORM, this->uniforms); this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f3102dc21e3..56dce2d1b81 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2485,6 +2485,11 @@ struct gl_uniform_block GLuint Binding; /** + * Vulkan descriptor set qualifier for this block. + */ + GLuint Set; + + /** * Minimum size (in bytes) of a buffer object to back this uniform buffer * (GL_UNIFORM_BLOCK_DATA_SIZE). */ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index c9c30449734..3c51d18ed62 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -541,6 +541,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -1244,10 +1245,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: case ir_unop_unpack_double_2x32: - case ir_binop_pack_half_2x16_split: case ir_unop_bitfield_reverse: case ir_unop_bit_count: case ir_unop_find_msb: @@ -2446,6 +2444,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, case GLSL_TYPE_STRUCT: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Should not get here."); break; } diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index ebcc5288c2e..f8659f65339 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -609,6 +609,7 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, instr->op = op; instr->dest_type = nir_type_float; instr->is_shadow = prog_inst->TexShadow; + instr->texture_index = prog_inst->TexSrcUnit; instr->sampler_index = prog_inst->TexSrcUnit; switch (prog_inst->TexSrcTarget) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f5b8c33622e..cf91d39ff92 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2177,12 +2177,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_4x8: - case ir_binop_pack_half_2x16_split: case ir_quadop_vector: case ir_binop_vector_extract: case ir_triop_vector_insert: diff --git a/src/util/bitset.h b/src/util/bitset.h index c452819414f..2404ce7f630 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -98,7 +98,7 @@ __bitset_ffs(const BITSET_WORD *x, int n) static inline unsigned __bitset_next_set(unsigned i, BITSET_WORD *tmp, - BITSET_WORD *set, unsigned size) + const BITSET_WORD *set, unsigned size) { unsigned bit, word; diff --git a/src/util/list.h b/src/util/list.h index 066f9b8dfe5..f0dec5da608 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -116,6 +116,28 @@ static inline unsigned list_length(struct list_head *list) return length; } +static inline void list_splice(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->next->prev = dst; + src->prev->next = dst->next; + dst->next->prev = src->prev; + dst->next = src->next; +} + +static inline void list_splicetail(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->prev->next = dst; + src->next->prev = dst->prev; + dst->prev->next = src->next; + dst->prev = src->prev; +} + static inline void list_validate(struct list_head *list) { struct list_head *node; diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore new file mode 100644 index 00000000000..8f9477c4c63 --- /dev/null +++ b/src/vulkan/.gitignore @@ -0,0 +1,7 @@ +# Generated source files +/*_spirv_autogen.h +/anv_entrypoints.c +/anv_entrypoints.h +/wayland-drm-protocol.c +/wayland-drm-client-protocol.h +/anv_icd.json diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am new file mode 100644 index 00000000000..58668c7cf88 --- /dev/null +++ b/src/vulkan/Makefile.am @@ -0,0 +1,180 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + +lib_LTLIBRARIES = libvulkan.la + +check_LTLIBRARIES = libvulkan-test.la + +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la \ + libanv-gen8.la \ + libanv-gen9.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/compiler \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_builddir)/src \ + -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/vulkan + +libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_descriptor_set.c \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_meta_clear.c \ + anv_meta_resolve.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ + anv_pass.c \ + anv_pipeline.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 + +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 + +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90 + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/isl/libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + ../mesa/libmesa.la \ + ../mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c new file mode 100644 index 00000000000..e935cd71df0 --- /dev/null +++ b/src/vulkan/anv_allocator.c @@ -0,0 +1,862 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <values.h> +#include <assert.h> +#include <linux/futex.h> +#include <linux/memfd.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + int32_t *next_ptr = *map + current.offset; + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) +{ + union anv_free_list current, old, new; + int32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(util_is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->bo.size = 0; + pool->block_size = block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->back_free_list = ANV_FREE_LIST_EMPTY; + + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return; + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) + return; + + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + pool->state.next = 0; + pool->state.end = 0; + pool->back_state.next = 0; + pool->back_state.end = 0; + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.end = anv_block_pool_grow(pool, &pool->state); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +#define PAGE_SIZE 4096 + +/** Grows and re-centers the block pool. + * + * We grow the block pool in one or both directions in such a way that the + * following conditions are met: + * + * 1) The size of the entire pool is always a power of two. + * + * 2) The pool only grows on both ends. Neither end can get + * shortened. + * + * 3) At the end of the allocation, we have about twice as much space + * allocated for each end as we have used. This way the pool doesn't + * grow too far in one direction or the other. + * + * 4) If the _alloc_back() has never been called, then the back portion of + * the pool retains a size of zero. (This makes it easier for users of + * the block pool that only want a one-sided pool.) + * + * 5) We have enough space allocated for at least one more block in + * whichever side `state` points to. + * + * 6) The center of the pool is always aligned to both the block_size of + * the pool and a 4K CPU page. + */ +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) +{ + size_t size; + void *map; + uint32_t gem_handle; + struct anv_mmap_cleanup *cleanup; + + pthread_mutex_lock(&pool->device->mutex); + + assert(state == &pool->state || state == &pool->back_state); + + /* Gather a little usage information on the pool. Since we may have + * threadsd waiting in queue to get some storage while we resize, it's + * actually possible that total_used will be larger than old_size. In + * particular, block_pool_alloc() increments state->next prior to + * calling block_pool_grow, so this ensures that we get enough space for + * which ever side tries to grow the pool. + * + * We align to a page size because it makes it easier to do our + * calculations later in such a way that we state page-aigned. + */ + uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); + uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); + uint32_t total_used = front_used + back_used; + + assert(state == &pool->state || back_used > 0); + + size_t old_size = pool->bo.size; + + if (old_size != 0 && + back_used * 2 <= pool->center_bo_offset && + front_used * 2 <= (old_size - pool->center_bo_offset)) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + if (old_size == 0) { + /* This is the first allocation */ + size = MAX2(32 * pool->block_size, PAGE_SIZE); + } else { + size = old_size * 2; + } + + /* We can't have a block pool bigger than 1GB because we use signed + * 32-bit offsets in the free list and we don't want overflow. We + * should never need a block pool bigger than 1GB anyway. + */ + assert(size <= (1u << 31)); + + /* We compute a new center_bo_offset such that, when we double the size + * of the pool, we maintain the ratio of how much is used by each side. + * This way things should remain more-or-less balanced. + */ + uint32_t center_bo_offset; + if (back_used == 0) { + /* If we're in this case then we have never called alloc_back(). In + * this case, we want keep the offset at 0 to make things as simple + * as possible for users that don't care about back allocations. + */ + center_bo_offset = 0; + } else { + /* Try to "center" the allocation based on how much is currently in + * use on each side of the center line. + */ + center_bo_offset = ((uint64_t)size * back_used) / total_used; + + /* Align down to a multiple of both the block size and page size */ + uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); + assert(util_is_power_of_two(granularity)); + center_bo_offset &= ~(granularity - 1); + + assert(center_bo_offset >= back_used); + + /* Make sure we don't shrink the back end of the pool */ + if (center_bo_offset < pool->back_state.end) + center_bo_offset = pool->back_state.end; + + /* Make sure that we don't shrink the front end of the pool */ + if (size - center_bo_offset < pool->state.end) + center_bo_offset = size - pool->state.end; + } + + assert(center_bo_offset % pool->block_size == 0); + assert(center_bo_offset % PAGE_SIZE == 0); + + /* Assert that we only ever grow the pool */ + assert(center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->state.end); + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + goto fail; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + cleanup->map = map; + cleanup->size = size; + + if (map == MAP_FAILED) + goto fail; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + goto fail; + cleanup->gem_handle = gem_handle; + +#if 0 + /* Regular objects are created I915_CACHING_CACHED on LLC platforms and + * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are + * always created as I915_CACHING_CACHED, which on non-LLC means + * snooped. That can be useful but comes with a bit of overheard. Since + * we're eplicitly clflushing and don't want the overhead we need to turn + * it off. */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); + anv_gem_set_domain(pool->device, gem_handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + } +#endif + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map + center_bo_offset; + pool->center_bo_offset = center_bo_offset; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + +done: + pthread_mutex_unlock(&pool->device->mutex); + + /* Return the appropreate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + if (state == &pool->state) { + return pool->bo.size - pool->center_bo_offset; + } else { + assert(pool->center_bo_offset > 0); + return pool->center_bo_offset; + } + +fail: + pthread_mutex_unlock(&pool->device->mutex); + + return 0; +} + +static uint32_t +anv_block_pool_alloc_new(struct anv_block_pool *pool, + struct anv_block_state *pool_state) +{ + struct anv_block_state state, old, new; + + while (1) { + state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool_state->next acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, pool_state); + assert(new.end >= new.next && new.end % pool->block_size == 0); + old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); + if (old.next != state.next) + futex_wake(&pool_state->end, INT_MAX); + return state.next; + } else { + futex_wait(&pool_state->end, state.end); + continue; + } + } +} + +int32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(offset >= 0); + assert(pool->map); + return offset; + } + + return anv_block_pool_alloc_new(pool, &pool->state); +} + +/* Allocates a block out of the back of the block pool. + * + * This will allocated a block earlier than the "start" of the block pool. + * The offsets returned from this function will be negative but will still + * be correct relative to the block pool's map pointer. + * + * If you ever use anv_block_pool_alloc_back, then you will have to do + * gymnastics with the block pool's BO when doing relocations. + */ +int32_t +anv_block_pool_alloc_back(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { + assert(offset < 0); + assert(pool->map); + return offset; + } + + offset = anv_block_pool_alloc_new(pool, &pool->back_state); + + /* The offset we get out of anv_block_pool_alloc_new() is actually the + * number of bytes downwards from the middle to the end of the block. + * We need to turn it into a (negative) offset from the middle to the + * start of the block. + */ + assert(offset >= 0); + return -(offset + pool->block_size); +} + +void +anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) +{ + if (offset < 0) { + anv_free_list_push(&pool->back_free_list, pool->map, offset); + } else { + anv_free_list_push(&pool->free_list, pool->map, offset); + } +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && util_is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + int32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { + assert(offset >= 0); + return offset; + } + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return offset; + } else { + futex_wait(&pool->block.end, block.end); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(util_is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct anv_state_stream_block { + /* The next block */ + struct anv_state_stream_block *next; + + /* The offset into the block pool at which this block starts */ + uint32_t offset; + +#ifdef HAVE_VALGRIND + /* A pointer to the first user-allocated thing in this block. This is + * what valgrind sees as the start of the block. + */ + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->block = NULL; + + /* Ensure that next + whatever > end. This way the first call to + * state_stream_alloc fetches a new block. + */ + stream->next = 1; + stream->end = 0; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + const uint32_t block_size = stream->block_pool->block_size; + + struct anv_state_stream_block *next = stream->block; + while (next != NULL) { + VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); + struct anv_state_stream_block sb = VG_NOACCESS_READ(next); + VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); + anv_block_pool_free(stream->block_pool, sb.offset); + next = sb.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct anv_state_stream_block *sb = stream->block; + + struct anv_state state; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + uint32_t block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + + VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); + sb->next = stream->block; + sb->offset = block; + VG(sb->_vg_ptr = NULL); + VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); + + stream->block = sb; + stream->start = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + assert(state.offset > stream->start); + state.map = (void *)sb + (state.offset - stream->start); + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); + } else { + void *state_end = state.map + state.alloc_size; + /* This only updates the mempool. The newly allocated chunk is still + * marked as NOACCESS. */ + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); + /* Mark the newly allocated chunk as undefined */ + VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..d74c5995168 --- /dev/null +++ b/src/vulkan/anv_batch_chain.c @@ -0,0 +1,1074 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen7_pack.h" +#include "gen8_pack.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->reloc_bos == NULL) { + anv_free(alloc, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + return anv_reloc_list_init_clone(list, alloc, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) { + anv_free(alloc, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + anv_reloc_list_grow(list, alloc, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, alloc, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->alloc, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->alloc, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, + &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->last_ss_pool_bo_offset = 0; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_free(&cmd_buffer->pool->alloc, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, + struct anv_cmd_buffer *cmd_buffer, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo; + result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + return (struct anv_address) { + .bo = &cmd_buffer->device->surface_state_block_pool.bo, + .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), + }; +} + +static void +emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + /* In gen8+ the address field grew to two dwords to accomodate 48 bit + * offsets. The high 16 bits are in the last dword, so we can use the gen8 + * version in either case, as long as we set the instruction length in the + * header accordingly. This means that we always emit three dwords here + * and all the padding and adjustment we do in this file works for all + * gens. + */ + + const uint32_t gen7_length = + GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; + const uint32_t gen8_length = + GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, + .DwordLength = cmd_buffer->device->info.gen < 8 ? + gen7_length : gen8_length, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { bo, offset }); +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); + struct anv_state state; + + state.alloc_size = align_u32(entries * 4, 32); + + if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) + return (struct anv_state) { 0 }; + + state.offset = cmd_buffer->bt_next; + state.map = block_pool->map + *bt_block + state.offset; + + cmd_buffer->bt_next += state.alloc_size; + + assert(*bt_block < 0); + *state_offset = -(*bt_block); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) +{ + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + + int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); + if (offset == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *offset = anv_block_pool_alloc_back(block_pool); + cmd_buffer->bt_next = 0; + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + + result = anv_batch_bo_create(cmd_buffer, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_batch_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + + success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), + 8 * sizeof(int32_t)); + if (!success) + goto fail_seen_bbos; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, + &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bt_blocks; + + anv_cmd_buffer_new_binding_table_block(cmd_buffer); + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_bt_blocks: + anv_vector_finish(&cmd_buffer->bt_blocks); + fail_seen_bbos: + anv_vector_finish(&cmd_buffer->seen_bbos); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, cmd_buffer); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + int32_t *bt_block; + anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + anv_vector_finish(&cmd_buffer->bt_blocks); + + anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { + int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); + cmd_buffer->bt_next = 0; + + cmd_buffer->surface_relocs.num_relocs = 0; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* When we start a batch buffer, we subtract a certain amount of + * padding from the end to ensure that we always have room to emit a + * BATCH_BUFFER_START to chain to the next BO. We need to remove + * that padding before we end the batch; otherwise, we may end up + * with our BATCH_BUFFER_END in another BO. + */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (!(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* When we chain, we need to add an MI_BATCH_BUFFER_START command + * with its relocation. In order to handle this we'll increment here + * so we can unconditionally decrement right before adding the + * MI_BATCH_BUFFER_START command. + */ + batch_bo->relocs.num_relocs++; + cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + emit_batch_buffer_start(primary, &first_bbo->bo, 0); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; + + /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we + * can emit a new command and relocation for the current splice. In + * order to handle the initial-use case, we incremented next and + * num_relocs in end_batch_buffer() so we can alyways just subtract + * here. + */ + last_bbo->relocs.num_relocs--; + secondary->batch.next -= inst_size; + emit_batch_buffer_start(secondary, &this_bbo->bo, offset); + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + + /* After patching up the secondary buffer, we need to clflush the + * modified instruction in case we're on a !llc platform. We use a + * little loop to handle the case where the instruction crosses a cache + * line boundary. + */ + if (!primary->device->info.has_llc) { + void *inst = secondary->batch.next - inst_size; + void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); + __builtin_ia32_mfence(); + while (p < secondary->batch.next) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, + &secondary->surface_relocs, 0); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) { + anv_free(&cmd_buffer->pool->alloc, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* A quick sanity check on relocations */ + assert(relocs->relocs[i].offset < bo->size); + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +static uint64_t +read_reloc(const struct anv_device *device, const void *p) +{ + if (device->info.gen >= 8) + return *(uint64_t *)p; + else + return *(uint32_t *)p; +} + +static void +write_reloc(const struct anv_device *device, void *p, uint64_t v) +{ + if (device->info.gen >= 8) + *(uint64_t *)p = v; + else + *(uint32_t *)p = v; +} + +static void +adjust_relocations_from_block_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs) +{ + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* In general, we don't know how stale the relocated value is. It + * may have been used last time or it may not. Since we don't want + * to stomp it while the GPU may be accessing it, we haven't updated + * it anywhere else in the code. Instead, we just set the presumed + * offset to what it is now based on the delta and the data in the + * block pool. Then the kernel will update it for us if needed. + */ + assert(relocs->relocs[i].offset < pool->state.end); + const void *p = pool->map + relocs->relocs[i].offset; + + /* We're reading back the relocated value from potentially incoherent + * memory here. However, any change to the value will be from the kernel + * writing out relocations, which will keep the CPU cache up to date. + */ + relocs->relocs[i].presumed_offset = + read_reloc(pool->device, p) - relocs->relocs[i].delta; + + /* All of the relocations from this block pool to other BO's should + * have been emitted relative to the surface block pool center. We + * need to add the center offset to make them relative to the + * beginning of the actual GEM bo. + */ + relocs->relocs[i].offset += pool->center_bo_offset; + } +} + +static void +adjust_relocations_to_block_pool(struct anv_block_pool *pool, + struct anv_bo *from_bo, + struct anv_reloc_list *relocs, + uint32_t *last_pool_center_bo_offset) +{ + assert(*last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + + /* When we initially emit relocations into a block pool, we don't + * actually know what the final center_bo_offset will be so we just emit + * it as if center_bo_offset == 0. Now that we know what the center + * offset is, we need to walk the list of relocations and adjust any + * relocations that point to the pool bo with the correct offset. + */ + for (size_t i = 0; i < relocs->num_relocs; i++) { + if (relocs->reloc_bos[i] == &pool->bo) { + /* Adjust the delta value in the relocation to correctly + * correspond to the new delta. Initially, this value may have + * been negative (if treated as unsigned), but we trust in + * uint32_t roll-over to fix that for us at this point. + */ + relocs->relocs[i].delta += delta; + + /* Since the delta has changed, we need to update the actual + * relocated value with the new presumed value. This function + * should only be called on batch buffers, so we know it isn't in + * use by the GPU at the moment. + */ + assert(relocs->relocs[i].offset < from_bo->size); + write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, + relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta); + } + } + + *last_pool_center_bo_offset = pool->center_bo_offset; +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_block_pool *ss_pool = + &cmd_buffer->device->surface_state_block_pool; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + &(*bbo)->last_ss_pool_bo_offset); + + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + } + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; + cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; + cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + first_batch_bo->bo.index = last_idx; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + + if (!cmd_buffer->device->info.has_llc) { + __builtin_ia32_mfence(); + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo.map + i); + } + } + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | + I915_EXEC_CONSTANTS_REL_GENERAL, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c new file mode 100644 index 00000000000..a0f9bab8e1e --- /dev/null +++ b/src/vulkan/anv_cmd_buffer.c @@ -0,0 +1,1198 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + +static void +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + + memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); + memset(state->binding_tables, 0, sizeof(state->binding_tables)); + memset(state->samplers, 0, sizeof(state->samplers)); + + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; + + state->dirty = ~0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->push_constants_dirty = 0; + state->pipeline = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; + state->need_query_wa = true; + + if (state->attachments != NULL) { + anv_free(&cmd_buffer->pool->alloc, state->attachments); + state->attachments = NULL; + } + + state->gen7.index_buffer = NULL; +} + +/** + * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. + */ +void +anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); + + anv_free(&cmd_buffer->pool->alloc, state->attachments); + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return; + } + + state->attachments = anv_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * + sizeof(state->attachments[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + abort(); + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags clear_aspects = 0; + + if (anv_format_is_color(att->format)) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if (att->format->depth_format && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + if (clear_aspects) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + } +} + +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) +{ + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; + cmd_buffer->state.attachments = NULL; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + + return result; +} + +VkResult anv_AllocateCommandBuffers( + VkDevice _device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->usage_flags = 0; + cmd_buffer->state.current_pipeline = UINT32_MAX; + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + anv_cmd_state_reset(cmd_buffer); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); + + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + + anv_cmd_buffer_set_subpass(cmd_buffer, subpass); + } + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstViewport + viewportCount; + if (cmd_buffer->state.dynamic.viewport.count < total_count); + cmd_buffer->state.dynamic.viewport.count = total_count; + + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstScissor + scissorCount; + if (cmd_buffer->state.dynamic.scissor.count < total_count); + cmd_buffer->state.dynamic.scissor.count = total_count; + + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void anv_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + descriptorSetCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + uint32_t range = 0; + if (desc->buffer_view) + range = desc->buffer_view->range; + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = range; + desc++; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } + } +} + +void anv_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); + } +} + +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); + + default: + unreachable("Invalid descriptor type"); + } +} + +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t color_count, bias, state_offset; + + switch (stage) { + case MESA_SHADER_FRAGMENT: + layout = cmd_buffer->state.pipeline->layout; + bias = MAX_RTS; + color_count = subpass->color_count; + break; + case MESA_SHADER_COMPUTE: + layout = cmd_buffer->state.compute_pipeline->layout; + bias = 1; + color_count = 0; + break; + default: + layout = cmd_buffer->state.pipeline->layout; + bias = 0; + color_count = 0; + break; + } + + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; + + if (color_count + surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + + if (stage == MESA_SHADER_COMPUTE && + cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + const struct anv_format *format = + anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, + format->surface_format, bo_offset, 12, 1); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + + if (layout == NULL) + goto out; + + if (layout->stage[stage].image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->sampler_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, + image_param); + image_param->surface_idx = bias + s; + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == layout->stage[stage].image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, struct anv_state *state) +{ + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == MESA_SHADER_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); + + return state; +} + +struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +/** + * @brief Setup the command buffer for recording commands inside the given + * subpass. + * + * This does not record all commands needed for starting the subpass. + * Starting the subpass may require additional commands. + * + * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the + * command buffer for recording commands for some subpass. But only the first + * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. + */ +void +anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 8: + gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 9: + gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } +} + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = + (local_id_dwords + prog_data->nr_params) * 4; + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + const unsigned push_constant_alignment = + cmd_buffer->device->info.gen < 8 ? 32 : 64; + const unsigned aligned_total_push_constants_size = + ALIGN(total_push_constants_size, push_constant_alignment); + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + aligned_total_push_constants_size, + push_constant_alignment); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +void anv_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; +} + +void anv_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void anv_DestroyCommandPool( + VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + anv_ResetCommandPool(_device, commandPool, 0); + + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + /* FIXME: vkResetCommandPool must not destroy its command buffers. The + * Vulkan 1.0 spec requires that it only reset them: + * + * Resetting a command pool recycles all of the resources from all of + * the command buffers allocated from the command pool back to the + * command pool. All command buffers that have been allocated from the + * command pool are put in the initial state. + */ + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + return VK_SUCCESS; +} + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_image_view *iview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + return iview; +} diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c new file mode 100644 index 00000000000..0ddd4bf1bc2 --- /dev/null +++ b/src/vulkan/anv_descriptor_set.c @@ -0,0 +1,604 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +/* + * Descriptor set layouts. + */ + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t max_binding = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + } + + size_t size = sizeof(struct anv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); + + set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[max_binding + 1]; + + set_layout->binding_count = max_binding + 1; + set_layout->shader_stages = 0; + set_layout->size = 0; + + for (uint32_t b = 0; b <= max_binding; b++) { + /* Initialize all binding_layout entries to -1 */ + memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); + + set_layout->binding[b].immutable_samplers = NULL; + } + + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + + uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t buffer_count = 0; + uint32_t dynamic_offset_count = 0; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + uint32_t b = binding->binding; + + assert(binding->descriptorCount > 0); + set_layout->binding[b].array_size = binding->descriptorCount; + set_layout->binding[b].descriptor_index = set_layout->size; + set_layout->size += binding->descriptorCount; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].buffer_index = buffer_count; + buffer_count += binding->descriptorCount; + /* fall through */ + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += binding->descriptorCount; + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].image_index = image_count[s]; + image_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; + + for (uint32_t i = 0; i < binding->descriptorCount; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(binding->pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + + set_layout->shader_stages |= binding->stageFlags; + } + + set_layout->buffer_count = buffer_count; + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_free2(&device->alloc, pAllocator, set_layout); +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just muttiple descriptor set layouts pasted together + */ + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout l, *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + l.num_sets = pCreateInfo->setLayoutCount; + + unsigned dynamic_offset_count = 0; + + memset(l.stage, 0, sizeof(l.stage)); + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + l.set[set].layout = set_layout; + + l.set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + } + + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + l.set[set].stage[s].surface_start = l.stage[s].surface_count; + l.set[set].stage[s].sampler_start = l.stage[s].sampler_count; + l.set[set].stage[s].image_start = l.stage[s].image_count; + + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; + + if (set_layout->binding[b].stage[s].surface_index >= 0) { + l.stage[s].surface_count += array_size; + + if (set_layout->binding[b].dynamic_offset_index >= 0) + l.stage[s].has_dynamic_offsets = true; + } + + if (set_layout->binding[b].stage[s].sampler_index >= 0) + l.stage[s].sampler_count += array_size; + + if (set_layout->binding[b].stage[s].image_index >= 0) + l.stage[s].image_count += array_size; + } + } + } + + unsigned num_bindings = 0; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + num_bindings += l.stage[s].surface_count + + l.stage[s].sampler_count + + l.stage[s].image_count; + } + + size_t size = sizeof(*layout) + num_bindings * sizeof(layout->entries[0]); + + layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Now we can actually build our surface and sampler maps */ + struct anv_pipeline_binding *entry = layout->entries; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + l.stage[s].surface_to_descriptor = entry; + entry += l.stage[s].surface_count; + l.stage[s].sampler_to_descriptor = entry; + entry += l.stage[s].sampler_count; + entry += l.stage[s].image_count; + + int surface = 0; + int sampler = 0; + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + struct anv_descriptor_set_layout *set_layout = l.set[set].layout; + + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[s].surface_index >= 0) { + assert(surface == l.set[set].stage[s].surface_start + + set_layout->binding[b].stage[s].surface_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].surface_to_descriptor[surface + i].set = set; + l.stage[s].surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[s].sampler_index >= 0) { + assert(sampler == l.set[set].stage[s].sampler_start + + set_layout->binding[b].stage[s].sampler_index); + for (unsigned i = 0; i < array_size; i++) { + l.stage[s].sampler_to_descriptor[sampler + i].set = set; + l.stage[s].sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + } + } + } + + /* Finally, we're done setting it up, copy into the allocated version */ + *layout = l; + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_free2(&device->alloc, pAllocator, pipeline_layout); +} + +/* + * Descriptor pools. These are a no-op for now. + */ + +VkResult anv_CreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + *pDescriptorPool = (VkDescriptorPool)1; + return VK_SUCCESS; +} + +void anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + + set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + set->layout = layout; + + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + + /* XXX: Use the pool */ + set->buffer_views = + anv_alloc(&device->alloc, + sizeof(set->buffer_views[0]) * layout->buffer_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set->buffer_views) { + anv_free(&device->alloc, set); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < layout->buffer_count; b++) { + set->buffer_views[b].surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + set->buffer_count = layout->buffer_count; + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + /* XXX: Use the pool */ + for (uint32_t b = 0; b < set->buffer_count; b++) + anv_state_pool_free(&device->surface_state_pool, + set->buffer_views[b].surface_state); + + anv_free(&device->alloc, set->buffer_views); + anv_free(&device->alloc, set); +} + +VkResult anv_AllocateDescriptorSets( + VkDevice _device, + const VkDescriptorSetAllocateInfo* pAllocateInfo, + VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result = VK_SUCCESS; + struct anv_descriptor_set *set; + uint32_t i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); + + return result; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +void anv_UpdateDescriptorSets( + VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->dstBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; + desc += write->dstArrayElement; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + desc[j].image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc[j].sampler = sampler; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .image_view = iview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = bview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + assert(buffer); + + struct anv_buffer_view *view = + &set->buffer_views[bind_layout->buffer_index]; + view += write->dstArrayElement + j; + + const struct anv_format *format = + anv_format_for_descriptor_type(write->descriptorType); + + view->format = format->surface_format; + view->bo = buffer->bo; + view->offset = buffer->offset + write->pBufferInfo[j].offset; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0 || + write->pBufferInfo[j].range == VK_WHOLE_SIZE) + view->range = buffer->size - write->pBufferInfo[j].offset; + else + view->range = write->pBufferInfo[j].range; + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, 1); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = view, + }; + + } + + default: + break; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); + ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); + + const struct anv_descriptor_set_binding_layout *src_layout = + &src->layout->binding[copy->srcBinding]; + struct anv_descriptor *src_desc = + &src->descriptors[src_layout->descriptor_index]; + src_desc += copy->srcArrayElement; + + const struct anv_descriptor_set_binding_layout *dst_layout = + &dst->layout->binding[copy->dstBinding]; + struct anv_descriptor *dst_desc = + &dst->descriptors[dst_layout->descriptor_index]; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } +} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c new file mode 100644 index 00000000000..c39c506c78f --- /dev/null +++ b/src/vulkan/anv_device.c @@ -0,0 +1,1732 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" +#include "util/strtod.h" +#include "util/debug.h" + +#include "gen7_pack.h" + +struct anv_dispatch_table dtable; + +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + VkResult result; + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to open %s: %m", path); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = instance; + device->path = path; + + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + if (!device->chipset_id) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get chipset id: %m"); + goto fail; + } + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id); + if (!device->info) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get device info"); + goto fail; + } + + if (device->info->is_haswell) { + fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && !device->info->is_baytrail) { + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && device->info->is_baytrail) { + fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); + } else if (device->info->gen >= 8) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully + * supported as anything */ + } else { + result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Vulkan not yet supported on %s", device->name); + goto fail; + } + + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); + goto fail; + } + + if (!device->info->has_llc && + anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + goto fail; + } + + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + + close(fd); + + brw_process_intel_debug_variable(); + + device->compiler = brw_compiler_create(NULL, device->info); + if (device->compiler == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; + + /* XXX: Actually detect bit6 swizzling */ + isl_device_init(&device->isl_dev, device->info, swizzled); + + return VK_SUCCESS; + +fail: + close(fd); + return result; +} + +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + ralloc_free(device->compiler); +} + +static const VkExtensionProperties global_extensions[] = { + { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 25, + }, + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#ifdef HAVE_WAYLAND_PLATFORM + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 4, + }, +#endif +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 67, + }, +}; + +static void * +default_alloc_func(void *pUserData, size_t size, size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, void *pOriginal, size_t size, + size_t align, VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + struct anv_instance *instance; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion; + if (VK_MAKE_VERSION(1, 0, 0) > client_version || + client_version > VK_MAKE_VERSION(1, 0, 2)) { + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Client requested version %d.%d.%d", + VK_VERSION_MAJOR(client_version), + VK_VERSION_MINOR(client_version), + VK_VERSION_PATCH(client_version)); + } + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + + instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion; + instance->physicalDeviceCount = -1; + + _mesa_locale_init(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + anv_init_wsi(instance); + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +void anv_DestroyInstance( + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + anv_physical_device_finish(&instance->physicalDevice); + } + + anv_finish_wsi(instance); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + _mesa_locale_fini(); + + anv_free(&instance->alloc, instance); +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount < 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { + return result; + } + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + anv_finishme("Get correct values for PhysicalDeviceFeatures"); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = true, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = false, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = true, + .logicOp = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = false, /* FINISHME */ + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .occlusionQueryPrecise = false, /* FINISHME */ + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = false, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .alphaToOne = true, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +void anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct brw_device_info *devinfo = pdevice->info; + + anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + + const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + + VkSampleCountFlags sample_counts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -16384.0, 16384.0 }, + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .driverVersion = 1, + .vendorID = 0x8086, + .deviceID = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ + }; + + strcpy(pProperties->deviceName, pdevice->name); + snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_SIZE, + "anv-%s", MESA_GIT_SHA1 + 4); +} + +void anv_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties) +{ + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + return; + } + + assert(*pCount >= 1); + + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 36, /* XXX: Real value here */ + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + }; +} + +void anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkDeviceSize heap_size; + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * physical_device->aperture_size / 4; + + if (physical_device->info->has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + pMemoryProperties->memoryTypeCount = 2; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + queue->device = device; + queue->pool = &device->surface_state_pool; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +} + +static struct anv_state +anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) +{ + struct anv_state state; + + state = anv_state_pool_alloc(pool, size, align); + memcpy(state.map, p, size); + + if (!pool->block_pool->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const struct gen8_border_color border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, + sizeof(border_colors), 64, + border_colors); +} + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + anv_set_dispatch_devinfo(physical_device->info); + + device = anv_alloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = physical_device->instance; + + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_device; + } + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; + + pthread_mutex_init(&device->mutex, NULL); + + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); + anv_pipeline_cache_init(&device->default_pipeline_cache, device); + + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_bo_init_new(&device->workaround_bo, device, 1024); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + anv_queue_init(device, &device->queue); + + result = anv_device_init_meta(device); + if (result != VK_SUCCESS) + goto fail_fd; + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_free(&device->alloc, device); + + return result; +} + +void anv_DestroyDevice( + VkDevice _device, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); + anv_gem_close(device, device->workaround_bo.gem_handle); + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); + + close(device->fd); + + pthread_mutex_destroy(&device->mutex); + + anv_free(&device->alloc, device); +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + + *pPropertyCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(device_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + + *pPropertyCount = ARRAY_SIZE(device_extensions); + memcpy(pProperties, device_extensions, sizeof(device_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateInstanceLayerProperties( + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +VkResult anv_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +void anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + + for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) + cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_batch batch; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result; + int64_t timeout; + int ret; + + state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); + bo = &device->dynamic_state_pool.block_pool->bo; + batch.start = batch.next = state.map; + batch.end = state.map + 32; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + if (!device->info.has_llc) + anv_state_clflush(state); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch.next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + anv_state_pool_free(&device->dynamic_state_pool, state); + + return VK_SUCCESS; + + fail: + anv_state_pool_free(&device->dynamic_state_pool, state); + + return result; +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + + return VK_SUCCESS; +} + +VkResult anv_AllocateMemory( + VkDevice _device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + /* We support exactly one memory heap. */ + assert(pAllocateInfo->memoryTypeIndex == 0 || + (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The kernel is going to give us whole pages anyway */ + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + + result = anv_bo_init_new(&mem->bo, device, alloc_size); + if (result != VK_SUCCESS) + goto fail; + + mem->type_index = pAllocateInfo->memoryTypeIndex; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} + +void anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem == NULL) + return; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_free2(&device->alloc, pAllocator, mem); +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->bo.size - offset; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + uint32_t gem_flags = 0; + if (!device->info.has_llc && mem->type_index == 0) + gem_flags |= I915_MMAP_WC; + + /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ + uint64_t map_offset = offset & ~4095ull; + assert(offset >= map_offset); + uint64_t map_size = (offset + size) - map_offset; + + /* Let's map whole pages */ + map_size = align_u64(map_size, 4096); + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + mem->map_size = map_size; + + *ppData = mem->map + (offset - map_offset); + + return VK_SUCCESS; +} + +void anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _memory) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) + return; + + anv_gem_munmap(mem->map, mem->map_size); +} + +static void +clflush_mapped_ranges(struct anv_device *device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); + void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); + void *end; + + if (ranges[i].offset + ranges[i].size > mem->map_size) + end = mem->map + mem->map_size; + else + end = mem->map + ranges[i].offset + ranges[i].size; + + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + /* Make sure no reads get moved up above the invalidate. */ + __builtin_ia32_mfence(); + + return VK_SUCCESS; +} + +void anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; +} + +void anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; +} + +void anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + stub(); +} + +void anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + if (mem) { + buffer->bo = &mem->bo; + buffer->offset = memoryOffset; + } else { + buffer->bo = NULL; + buffer->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (mem) { + image->bo = &mem->bo; + image->offset = memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) +{ + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + if (!device->info.has_llc) { + assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); + assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + __builtin_ia32_mfence(); + __builtin_ia32_clflush(fence->bo.map); + } + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + fence->ready = false; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, fence); + + return result; +} + +void anv_DestroyFence( + VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, fence); +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; + + int64_t t = timeout; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem wait failed: %m"); + } + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) +{ + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + + *pSemaphore = (VkSemaphore)1; + + return VK_SUCCESS; +} + +void anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) +{ +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice _device, + const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkEvent* pEvent) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_event *event; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); + + state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(*event), 4); + event = state.map; + event->state = state; + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + *pEvent = anv_event_to_handle(event); + + return VK_SUCCESS; +} + +void anv_DestroyEvent( + VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_state_pool_free(&device->dynamic_state_pool, event->state); +} + +VkResult anv_GetEventStatus( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + if (!device->info.has_llc) { + /* Invalidate read cache before reading event written by GPU. */ + __builtin_ia32_clflush(event); + __builtin_ia32_mfence(); + + } + + return event->semaphore; +} + +VkResult anv_SetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_SET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +VkResult anv_ResetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_free2(&device->alloc, pAllocator, buffer); +} + +void +anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_buffer_surface_state(state.map, format, offset, range, + stride); + else + gen7_fill_buffer_surface_state(state.map, format, offset, range, + stride); + break; + case 8: + gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + case 9: + gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +void anv_DestroySampler( + VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_free2(&device->alloc, pAllocator, sampler); +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + framebuffer->attachments[i] = anv_image_view_from_handle(_iview); + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +void anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_free2(&device->alloc, pAllocator, fb); +} + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) +{ +} diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c new file mode 100644 index 00000000000..b7fa28be787 --- /dev/null +++ b/src/vulkan/anv_dump.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +/* This file contains utility functions for help debugging. They can be + * called from GDB or similar to help inspect images and buffers. + */ + +void +anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename) +{ + VkDevice vk_device = anv_device_to_handle(device); + VkResult result; + + VkExtent2D extent = { image->extent.width, image->extent.height }; + for (unsigned i = 0; i < miplevel; i++) { + extent.width = MAX2(1, extent.width / 2); + extent.height = MAX2(1, extent.height / 2); + } + + VkImage copy_image; + result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = (VkExtent3D) { extent.width, extent.height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .flags = 0, + }, NULL, ©_image); + assert(result == VK_SUCCESS); + + VkMemoryRequirements reqs; + anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + + VkDeviceMemory memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = reqs.size, + .memoryTypeIndex = 0, + }, NULL, &memory); + assert(result == VK_SUCCESS); + + result = anv_BindImageMemory(vk_device, copy_image, memory, 0); + assert(result == VK_SUCCESS); + + VkCommandPool commandPool; + result = anv_CreateCommandPool(vk_device, + &(VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = 0, + .flags = 0, + }, NULL, &commandPool); + assert(result == VK_SUCCESS); + + VkCommandBuffer cmd; + result = anv_AllocateCommandBuffers(vk_device, + &(VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }, &cmd); + assert(result == VK_SUCCESS); + + result = anv_BeginCommandBuffer(cmd, + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }); + assert(result == VK_SUCCESS); + + anv_CmdBlitImage(cmd, + anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, + copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, + &(VkImageBlit) { + .srcSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = miplevel, + .baseArrayLayer = array_layer, + .layerCount = 1, + }, + .srcOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + .dstSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + }, VK_FILTER_NEAREST); + + ANV_CALL(CmdPipelineBarrier)(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + true, 0, NULL, 0, NULL, 1, + &(VkImageMemoryBarrier) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = 0, + .dstQueueFamilyIndex = 0, + .image = copy_image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + result = anv_EndCommandBuffer(cmd); + assert(result == VK_SUCCESS); + + VkFence fence; + result = anv_CreateFence(vk_device, + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, NULL, &fence); + assert(result == VK_SUCCESS); + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, fence); + assert(result == VK_SUCCESS); + + result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); + assert(result == VK_SUCCESS); + + anv_DestroyFence(vk_device, fence, NULL); + anv_DestroyCommandPool(vk_device, commandPool, NULL); + + uint8_t *map; + result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); + assert(result == VK_SUCCESS); + + VkSubresourceLayout layout; + anv_GetImageSubresourceLayout(vk_device, copy_image, + &(VkImageSubresource) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .arrayLayer = 0, + }, &layout); + + map += layout.offset; + + /* Now we can finally write the PPM file */ + FILE *file = fopen(filename, "wb"); + assert(file); + + fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); + for (unsigned y = 0; y < extent.height; y++) { + uint8_t row[extent.width * 3]; + for (unsigned x = 0; x < extent.width; x++) { + row[x * 3 + 0] = map[x * 4 + 0]; + row[x * 3 + 1] = map[x * 4 + 1]; + row[x * 3 + 2] = map[x * 4 + 2]; + } + fwrite(row, 3, extent.width, file); + + map += layout.rowPitch; + } + fclose(file); + + anv_UnmapMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image, NULL); + anv_FreeMemory(vk_device, memory, NULL); +} diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py new file mode 100644 index 00000000000..1e4cfcb1755 --- /dev/null +++ b/src/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + print "/* This file generated from vk_gen.py, don't edit directly. */\n" + + print "struct anv_dispatch_table {" + print " union {" + print " void *entrypoints[%d];" % len(entrypoints) + print " struct {" + + for type, name, args, num, h in entrypoints: + print " %s (*%s)%s;" % (type, name, args) + print " };\n" + print " };\n" + print "};\n" + + print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" + + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s gen7_%s%s;" % (type, name, args) + print "%s gen75_%s%s;" % (type, name, args) + print "%s gen8_%s%s;" % (type, name, args) + print "%s gen9_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x }," % (offsets[num], h) +print "};\n" + +for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: + for type, name, args, num, h in entrypoints: + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print "\nconst struct anv_dispatch_table %s_layer = {" % layer + for type, name, args, num, h in entrypoints: + print " .%s = %s_%s," % (name, layer, name) + print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static const struct brw_device_info *dispatch_devinfo; + +void +anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) +{ + dispatch_devinfo = devinfo; +} + +void * __attribute__ ((noinline)) +anv_resolve_entrypoint(uint32_t index) +{ + if (enable_validate && validate_layer.entrypoints[index]) + return validate_layer.entrypoints[index]; + + if (dispatch_devinfo == NULL) { + assert(anv_layer.entrypoints[index]); + return anv_layer.entrypoints[index]; + } + + switch (dispatch_devinfo->gen) { + case 9: + if (gen9_layer.entrypoints[index]) + return gen9_layer.entrypoints[index]; + /* fall through */ + case 8: + if (gen8_layer.entrypoints[index]) + return gen8_layer.entrypoints[index]; + /* fall through */ + case 7: + if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) + return gen75_layer.entrypoints[index]; + + if (gen7_layer.entrypoints[index]) + return gen7_layer.entrypoints[index]; + /* fall through */ + case 0: + return anv_layer.entrypoints[index]; + default: + unreachable("unsupported gen\\n"); + } +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return anv_resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c new file mode 100644 index 00000000000..e3c786ccca7 --- /dev/null +++ b/src/vulkan/anv_formats.c @@ -0,0 +1,605 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "brw_surface_formats.h" + +#include "gen7_pack.h" + +#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) +#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) + +#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ + [__vk_fmt] = { \ + .vk_format = __vk_fmt, \ + .name = #__vk_fmt, \ + .surface_format = __hw_fmt, \ + .isl_layout = &isl_format_layouts[__hw_fmt], \ + .swizzle = __swizzle, \ + __VA_ARGS__ \ + } + +#define fmt(__vk_fmt, __hw_fmt, ...) \ + swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) + +/* HINT: For array formats, the ISL name should match the VK name. For + * packed formats, they should have the channels in reverse order from each + * other. The reason for this is that, for packed formats, the ISL (and + * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. + */ +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), + fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), + swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), + swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), + fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), + fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), + fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), + fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .depth_format = D32_FLOAT, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), + fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), + fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle) +{ + const struct anv_format *anv_fmt = &anv_formats[format]; + + if (swizzle) + *swizzle = anv_fmt->swizzle; + + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + if (anv_fmt->surface_format == ISL_FORMAT_UNSUPPORTED) { + return ISL_FORMAT_UNSUPPORTED; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + !util_is_power_of_two(anv_fmt->isl_layout->bs)) { + /* Tiled formats *must* be power-of-two because we need up upload + * them with the render pipeline. For 3-channel formats, we fix + * this by switching them over to RGBX or RGBA formats under the + * hood. + */ + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->surface_format); + if (rgbx != ISL_FORMAT_UNSUPPORTED) + return rgbx; + else + return isl_format_rgb_to_rgba(anv_fmt->surface_format); + } else { + return anv_fmt->surface_format; + } + + case VK_IMAGE_ASPECT_DEPTH_BIT: + case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): + assert(anv_fmt->depth_format != 0); + return anv_fmt->surface_format; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(anv_fmt->has_stencil); + return ISL_FORMAT_R8_UINT; + + default: + unreachable("bad VkImageAspect"); + return ISL_FORMAT_UNSUPPORTED; + } +} + +// Format capabilities + +void anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +static VkFormatFeatureFlags +get_image_format_properties(int gen, enum isl_format base, + enum isl_format actual, + struct anv_format_swizzle swizzle) +{ + const struct brw_surface_format_info *info = &surface_formats[actual]; + + if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen) { + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + + if (info->filtering <= gen) + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + /* We can render to swizzled formats. However, if the alpha channel is + * moved, then blending won't work correctly. The PRM tells us + * straight-up not to render to such a surface. + */ + if (info->render_target <= gen && swizzle.a == 3) { + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (info->alpha_blend <= gen && swizzle.a == 3) + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + /* Load/store is determined based on base format. This prevents RGB + * formats from showing up as load/store capable. + */ + if (isl_is_storage_image_format(base)) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + + if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + + return flags; +} + +static VkFormatFeatureFlags +get_buffer_format_properties(int gen, enum isl_format format) +{ + const struct brw_surface_format_info *info = &surface_formats[format]; + + if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen && !isl_format_is_compressed(format)) + flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + if (info->input_vb <= gen) + flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (isl_is_storage_image_format(format)) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + + return flags; +} + +static void +anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + int gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + if (anv_format_is_depth_or_stencil(&anv_formats[format])) { + tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (physical_device->info->gen >= 8) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + if (anv_formats[format].depth_format) { + tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } else { + enum isl_format linear_fmt, tiled_fmt; + struct anv_format_swizzle linear_swizzle, tiled_swizzle; + linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, &linear_swizzle); + tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); + + linear = get_image_format_properties(gen, linear_fmt, linear_fmt, + linear_swizzle); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, + tiled_swizzle); + buffer = get_buffer_format_properties(gen, linear_fmt); + + /* XXX: We handle 3-channel formats by switching them out for RGBX or + * RGBA formats behind-the-scenes. This works fine for textures + * because the upload process will fill in the extra channel. + * We could also support it for render targets, but it will take + * substantially more work and we have enough RGBX formats to handle + * what most clients will want. + */ + if (linear_fmt != ISL_FORMAT_UNSUPPORTED && + !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && + isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { + tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & + ~VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; + + return; +} + + +void anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + anv_physical_device_get_format_properties( + physical_device, + format, + pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties* pImageFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + + anv_physical_device_get_format_properties(physical_device, format, + &format_props); + + /* Extract the VkFormatFeatureFlags that are relevant for the queried + * tiling. + */ + if (tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + switch (type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + sampleCounts = VK_SAMPLE_COUNT_1_BIT; + break; + case VK_IMAGE_TYPE_2D: + /* FINISHME: Does this really differ for cube maps? The documentation + * for RENDER_SURFACE_STATE suggests so. + */ + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (tiling == VK_IMAGE_TILING_OPTIMAL && + type == VK_IMAGE_TYPE_2D && + (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + +#if 0 + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (anv_format_for_vk_format(format)->has_stencil) { + /* Not yet implemented because copying to a W-tiled surface is crazy + * hard. + */ + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " + "stencil format"); + goto unsupported; + } + } +#endif + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { + /* Nothing to check. */ + } + + if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + /* Ignore this flag because it was removed from the + * provisional_I_20150910 header. + */ + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; + +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c new file mode 100644 index 00000000000..0a7be353327 --- /dev/null +++ b/src/vulkan/anv_gem.c @@ -0,0 +1,358 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + gem_mmap.flags = flags; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +int +anv_gem_set_caching(struct anv_device *device, + uint32_t gem_handle, uint32_t caching) +{ + struct drm_i915_gem_caching gem_caching; + + VG_CLEAR(gem_caching); + gem_caching.handle = gem_handle; + gem_caching.caching = caching; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + struct drm_i915_gem_set_domain gem_set_domain; + + VG_CLEAR(gem_set_domain); + gem_set_domain.handle = gem_handle; + gem_set_domain.read_domains = read_domains; + gem_set_domain.write_domain = write_domain; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + struct drm_gem_close close; + int ret; + + struct drm_i915_gem_create gem_create; + VG_CLEAR(gem_create); + gem_create.size = 4096; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { + assert(!"Failed to create GEM BO"); + return false; + } + + bool swizzled = false; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + struct drm_i915_gem_set_tiling set_tiling; + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_create.handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + if (ret != 0) { + assert(!"Failed to set BO tiling"); + goto close_and_return; + } + + struct drm_i915_gem_get_tiling get_tiling; + VG_CLEAR(get_tiling); + get_tiling.handle = gem_create.handle; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { + assert(!"Failed to get BO tiling"); + goto close_and_return; + } + + swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; + +close_and_return: + + VG_CLEAR(close); + close.handle = gem_create.handle; + anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + + return swizzled; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c new file mode 100644 index 00000000000..3204fefb28e --- /dev/null +++ b/src/vulkan/anv_gem_stubs.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <linux/memfd.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + /* Ignore flags, as they're specific to I915_GEM_MMAP. */ + (void) flags; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, + uint32_t caching) +{ + return 0; +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + unreachable("Unused"); +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} diff --git a/src/vulkan/anv_gen_macros.h b/src/vulkan/anv_gen_macros.h new file mode 100644 index 00000000000..ef2ecd55a9b --- /dev/null +++ b/src/vulkan/anv_gen_macros.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifdef ANV_GENx10 + +/* Gen checking macros */ +#define ANV_GEN ((ANV_GENx10) / 10) +#define ANV_IS_HASWELL ((ANV_GENx10) == 75) + +/* Prefixing macros */ +#if (ANV_GENx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (ANV_GENx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (ANV_GENx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (ANV_GENx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for your gen" +#endif + +/* Macros for comparing gens */ +#if (ANV_GENx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (ANV_GENx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) + +#endif /* ANV_GENx10 */ diff --git a/src/vulkan/anv_icd.json.in b/src/vulkan/anv_icd.json.in new file mode 100644 index 00000000000..ad069b3e2ff --- /dev/null +++ b/src/vulkan/anv_icd.json.in @@ -0,0 +1,8 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@abs_top_builddir@/@LIB_DIR@/libvulkan.so.0.0.0", + "abi_versions": "0.210.1" + } +} + diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c new file mode 100644 index 00000000000..da71406cf27 --- /dev/null +++ b/src/vulkan/anv_image.c @@ -0,0 +1,880 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +/** + * The \a format argument is required and overrides any format found in struct + * anv_image_create_info. Exactly one bit must be set in \a aspect. + */ +static isl_surf_usage_flags_t +choose_isl_surf_usage(const struct anv_image_create_info *info, + VkImageAspectFlags aspect) +{ + const VkImageCreateInfo *vk_info = info->vk_info; + isl_surf_usage_flags_t isl_flags = 0; + + /* FINISHME: Support aux surfaces */ + isl_flags |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + + if (vk_info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + + if (vk_info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + isl_flags |= ISL_SURF_USAGE_CUBE_BIT; + + if (vk_info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_DEPTH_BIT: + isl_flags |= ISL_SURF_USAGE_DEPTH_BIT; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + isl_flags |= ISL_SURF_USAGE_STENCIL_BIT; + break; + } + } + + if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + isl_flags |= ISL_SURF_USAGE_TEXTURE_BIT; + } + + if (vk_info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta implements transfers by rendering into the destination image. */ + isl_flags |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + + return isl_flags; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +static struct anv_surface * +get_surface(struct anv_image *image, VkImageAspectFlags aspect) +{ + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_COLOR_BIT: + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT: + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return &image->stencil_surface; + } +} + +/** + * Initialize the anv_image::*_surface selected by \a aspect. Then update the + * image's memory requirements (that is, the image's size and alignment). + * + * Exactly one bit must be set in \a aspect. + */ +static VkResult +make_surface(const struct anv_device *dev, + struct anv_image *image, + const struct anv_image_create_info *anv_info, + VkImageAspectFlags aspect) +{ + const VkImageCreateInfo *vk_info = anv_info->vk_info; + bool ok UNUSED; + + static const enum isl_surf_dim vk_to_isl_surf_dim[] = { + [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, + [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, + [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, + }; + + isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; + if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) + tiling_flags &= ISL_TILING_LINEAR_BIT; + + struct anv_surface *anv_surf = get_surface(image, aspect); + + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = anv_get_isl_format(vk_info->format, aspect, + vk_info->tiling, NULL), + .width = vk_info->extent.width, + .height = vk_info->extent.height, + .depth = vk_info->extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .min_pitch = 0, + .usage = choose_isl_surf_usage(anv_info, aspect), + .tiling_flags = tiling_flags); + + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + + anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); + image->size = anv_surf->offset + anv_surf->isl.size; + image->alignment = MAX(image->alignment, anv_surf->isl.alignment); + + return VK_SUCCESS; +} + +static VkImageUsageFlags +anv_image_get_full_usage(const VkImageCreateInfo *info) +{ + VkImageUsageFlags usage = info->usage; + + if (info->samples > 1 && + (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + /* Meta will resolve the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta will transfer to the image by binding it as a color attachment, + * even if the image format is not a color format. + */ + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + + return usage; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + const VkAllocationCallbacks* alloc, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + struct anv_image *image = NULL; + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arrayLayers > 0); + anv_assert(pCreateInfo->samples > 0); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; + image->format = anv_format_for_vk_format(pCreateInfo->format); + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->usage = anv_image_get_full_usage(pCreateInfo); + image->tiling = pCreateInfo->tiling; + + if (likely(anv_format_is_color(image->format))) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_COLOR_BIT); + if (r != VK_SUCCESS) + goto fail; + } else { + if (image->format->depth_format) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT); + if (r != VK_SUCCESS) + goto fail; + } + + if (image->format->has_stencil) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT); + if (r != VK_SUCCESS) + goto fail; + } + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_free2(&device->alloc, alloc, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + .isl_tiling_flags = ISL_TILING_ANY_MASK, + }, + pAllocator, + pImage); +} + +void +anv_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); +} + +static void +anv_surface_get_subresource_layout(struct anv_image *image, + struct anv_surface *surface, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + /* If we are on a non-zero mip level or array slice, we need to + * calculate a real offset. + */ + anv_assert(subresource->mipLevel == 0); + anv_assert(subresource->arrayLayer == 0); + + layout->offset = surface->offset; + layout->rowPitch = surface->isl.row_pitch; + layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); + layout->size = surface->isl.size; +} + +void anv_GetImageSubresourceLayout( + VkDevice device, + VkImage _image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + assert(__builtin_popcount(pSubresource->aspectMask) == 1); + + switch (pSubresource->aspectMask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + anv_surface_get_subresource_layout(image, &image->color_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + anv_surface_get_subresource_layout(image, &image->depth_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + anv_surface_get_subresource_layout(image, &image->stencil_surface, + pSubresource, pLayout); + break; + default: + assert(!"Invalid image aspect"); + } +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_format *view_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspectMask != 0); + assert(subresource->levelCount > 0); + assert(subresource->layerCount > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseArrayLayer < image->array_size); + assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(pView); + + const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT + | VK_IMAGE_ASPECT_STENCIL_BIT; + + /* Validate format. */ + if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(!image->format->depth_format); + assert(!image->format->has_stencil); + assert(!view_format_info->depth_format); + assert(!view_format_info->has_stencil); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } else if (subresource->aspectMask & ds_flags) { + assert((subresource->aspectMask & ~ds_flags) == 0); + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(image->format->depth_format); + assert(view_format_info->depth_format); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image->format->has_stencil); + assert(view_format_info->has_stencil); + } + } else { + assert(!"bad VkImageSubresourceRange::aspectFlags"); + } + + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); +} + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + else + gen7_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 8: + gen8_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 9: + gen9_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +static struct anv_state +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + +static bool +has_matching_storage_typed_format(const struct anv_device *device, + enum isl_format format) +{ + return (isl_format_get_layout(format)->bs <= 4 || + (isl_format_get_layout(format)->bs <= 8 && + (device->info.gen >= 8 || device->info.is_haswell)) || + device->info.gen >= 9); +} + +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, + struct anv_format_swizzle format_swizzle) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + swizzle = component; + + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case VK_COMPONENT_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + case VK_COMPONENT_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + case VK_COMPONENT_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + case VK_COMPONENT_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + default: + unreachable("Invalid swizzle"); + } +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkImageViewCreateInfo mCreateInfo; + memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); + + assert(range->layerCount > 0); + assert(range->baseMipLevel < image->levels); + assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + range->layerCount - 1 + <= anv_minify(image->extent.depth, range->baseMipLevel)); + break; + } + + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + iview->image = image; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset + offset; + + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + iview->vk_format = pCreateInfo->format; + + struct anv_format_swizzle swizzle; + iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, + image->tiling, &swizzle); + iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle); + iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle); + iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle); + iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle); + + iview->base_layer = range->baseArrayLayer; + iview->base_mip = range->baseMipLevel; + + if (!isl_format_is_compressed(iview->format) && + isl_format_is_compressed(image->format->surface_format)) { + /* Scale the ImageView extent by the backing Image. This is used + * internally when an uncompressed ImageView is created on a + * compressed Image. The ImageView can therefore be used for copying + * data from a source Image to a destination Image. + */ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); + iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); + mCreateInfo.subresourceRange.baseMipLevel = 0; + mCreateInfo.subresourceRange.baseArrayLayer = 0; + } else { + iview->level_0_extent.width = image->extent.width; + iview->level_0_extent.height = image->extent.height; + iview->level_0_extent.depth = image->extent.depth; + } + + iview->extent = (VkExtent3D) { + .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), + .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), + .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + }; + + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->sampler_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + } else { + iview->sampler_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->color_rt_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } else { + iview->color_rt_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + if (has_matching_storage_typed_format(device, iview->format)) + anv_fill_image_surface_state(device, iview->storage_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + else + anv_fill_buffer_surface_state(device, iview->storage_surface_state, + ISL_FORMAT_RAW, + iview->offset, + iview->bo->size - iview->offset, 1); + + } else { + iview->storage_surface_state.alloc_size = 0; + } +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL, 0); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + if (iview->color_rt_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->color_rt_surface_state); + } + + if (iview->sampler_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->sampler_surface_state); + } + + if (iview->storage_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->storage_surface_state); + } + + anv_free2(&device->alloc, pAllocator, iview); +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->format = format->surface_format; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->range = pCreateInfo->range == VK_WHOLE_SIZE ? + buffer->size - view->offset : pCreateInfo->range; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, + format->isl_layout->bs); + } else { + view->surface_state = (struct anv_state){ 0 }; + } + + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + view->storage_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + enum isl_format storage_format = + has_matching_storage_typed_format(device, view->format) ? + isl_lower_storage_image_format(&device->isl_dev, view->format) : + ISL_FORMAT_RAW; + + anv_fill_buffer_surface_state(device, view->storage_surface_state, + storage_format, + view->offset, view->range, + (storage_format == ISL_FORMAT_RAW ? 1 : + format->isl_layout->bs)); + + } else { + view->storage_surface_state = (struct anv_state){ 0 }; + } + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + + if (view->surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->surface_state); + + if (view->storage_surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->storage_surface_state); + + anv_free2(&device->alloc, pAllocator, view); +} + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) +{ + switch (aspect_mask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + /* Dragons will eat you. + * + * Meta attaches all destination surfaces as color render targets. Guess + * what surface the Meta Dragons really want. + */ + if (image->format->depth_format && image->format->has_stencil) { + return &image->depth_surface; + } else if (image->format->depth_format) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + return &image->color_surface; + } + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(image->format->depth_format); + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(image->format->has_stencil); + return &image->stencil_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + if (image->format->depth_format && image->format->has_stencil) { + /* FINISHME: The Vulkan spec (git a511ba2) requires support for + * combined depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + * + * Image views with both depth and stencil aspects are only valid for + * render target attachments, in which case + * cmd_buffer_emit_depth_stencil() will pick out both the depth and + * stencil surfaces from the underlying surface. + */ + return &image->depth_surface; + } else if (image->format->depth_format) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } + /* fallthrough */ + default: + unreachable("image does not have aspect"); + return NULL; + } +} + +static void +image_param_defaults(struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +void +anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + const struct isl_surf *surf = &view->image->color_surface.isl; + const int cpp = isl_format_get_layout(surf->format)->bs; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + param->size[0] = view->extent.width; + param->size[1] = view->extent.height; + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = view->extent.depth; + } else { + param->size[2] = surf->logical_level0_px.array_len - view->base_layer; + } + + isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, + ¶m->offset[0], ¶m->offset[1]); + + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = util_logbase2(512 / cpp); + param->tiling[1] = util_logbase2(8); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = util_logbase2(16 / cpp); + param->tiling[1] = util_logbase2(32); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_mip : 0); +} + +void +anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + param->stride[0] = isl_format_layouts[view->format].bs; + param->size[0] = view->range / param->stride[0]; +} diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c new file mode 100644 index 00000000000..d95d9afe8cf --- /dev/null +++ b/src/vulkan/anv_intel.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + VkImage image_h; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = pCreateInfo->strideInBytes, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->format, + .extent = pCreateInfo->extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, &image_h); + + image = anv_image_from_handle(image_h); + image->bo = &mem->bo; + image->offset = 0; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c new file mode 100644 index 00000000000..72a927a08ee --- /dev/null +++ b/src/vulkan/anv_meta.c @@ -0,0 +1,1596 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + +static nir_shader * +build_nir_vertex_shader(bool attr_flat) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vertex_type, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vertex_type, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); + nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, + attr_type, "a_attr"); + attr_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, + attr_type, "v_attr"); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : + INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + glsl_vec4_type(), "v_attr"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(color_type)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + + state->dynamic_mask = dynamic_mask; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_mask); +} + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; +} + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) +{ + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad VkImageViewType"); + } +} + +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->baseArrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(false), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->surface_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + +VkResult +anv_device_init_meta(struct anv_device *device) +{ + VkResult result; + + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + goto fail_blit; + + return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + anv_device_finish_meta_resolve_state(device); + anv_device_finish_meta_clear_state(device); + + /* Blit */ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h new file mode 100644 index 00000000000..91c3c7d21ca --- /dev/null +++ b/src/vulkan/anv_meta.h @@ -0,0 +1,72 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ANV_META_VERTEX_BINDING_COUNT 2 + +struct anv_meta_saved_state { + struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic + * state. + */ + uint32_t dynamic_mask; + struct anv_dynamic_state dynamic; +}; + +VkResult anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); + +VkResult anv_device_init_meta_resolve_state(struct anv_device *device); +void anv_device_finish_meta_resolve_state(struct anv_device *device); + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask); + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer); + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image); + +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c new file mode 100644 index 00000000000..15e24a32a75 --- /dev/null +++ b/src/vulkan/anv_meta_clear.c @@ -0,0 +1,1097 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + VkClearColorValue color; +}; + +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; + +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t samples, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc, + bool use_repclear, + struct anv_pipeline **pipeline) +{ + VkDevice device_h = anv_device_to_handle(device); + VkResult result; + + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; + + VkPipeline pipeline_h = VK_NULL_HANDLE; + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = fs_nir ? 2 : 1, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = NULL, /* dynamic */ + .scissorCount = 1, + .pScissors = NULL, /* dynamic */ + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { ~0 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, + .use_repclear = use_repclear, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + alloc, + &pipeline_h); + + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; +} + +static VkResult +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir, frag_output); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct color_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }, + }, + }; + + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ + return + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); +} + +static void +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) +{ + if (!pipeline) + return; + + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); + } +} + +static void +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + struct anv_pipeline *pipeline = + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shader(struct nir_shader **out_vs) +{ + nir_builder vs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; +} + +static VkResult +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects, + uint32_t samples, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + + build_depthstencil_shader(&vs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct depthstencil_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct depthstencil_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthBoundsTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); +} + +static void +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(pass_att != VK_ATTACHMENT_UNUSED); + + const struct depthstencil_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + anv_pipeline_to_handle(pipeline)); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +VkResult +anv_device_init_meta_clear_state(struct anv_device *device) +{ + VkResult res; + struct anv_meta_state *state = &device->meta_state; + + zero(device->meta_state.clear); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; + + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_clear_state(device); + return res; +} + +/** + * The parameters mean that same as those in vkCmdClearAttachments. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att, clear_rect); + } else { + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + } +} + +static bool +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { + return true; + } + } + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + + return false; +} + +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ +void +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_meta_saved_state saved_state; + + if (!subpass_needs_clear(cmd_buffer)) + return; + + meta_clear_begin(&saved_state, cmd_buffer); + + if (cmd_state->framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; + + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[a].pending_clear_aspects = 0; + } + + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(image), + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + subpass_desc.pDepthStencilAttachment = &att_ref; + } + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); + } + } + } +} + +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c new file mode 100644 index 00000000000..f1c985e04cf --- /dev/null +++ b/src/vulkan/anv_meta_resolve.c @@ -0,0 +1,865 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + &device->meta_state.resolve.pass); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = device->meta_state.resolve.pass, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + get_pipeline_h(device, num_samples)); + if (result != VK_SUCCESS) + goto cleanup; + + goto cleanup; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + res = create_pass(device); + if (res != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + + uint32_t sample_count = 1 << (1 + i); + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image *src_image = src_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.blit.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = (VkExtent2D) { fb->width, fb->height }, + }, + }); + + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; + struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); + + meta_resolve_save(&state, cmd_buffer); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + emit_resolve(cmd_buffer, + &src_iview, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); + } + } + + meta_resolve_restore(&state, cmd_buffer); +} + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h new file mode 100644 index 00000000000..c76314d9df6 --- /dev/null +++ b/src/vulkan/anv_nir.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); +bool anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, + const struct anv_pipeline_layout *layout); + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c new file mode 100644 index 00000000000..a5e3238a36a --- /dev/null +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 8; + offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, + nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, + &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), + offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_remove(&intrin->instr); + *offset_src = nir_src_for_ssa(new_offset); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + + if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); + } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].range; + } + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c new file mode 100644 index 00000000000..00ed7766acb --- /dev/null +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -0,0 +1,322 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "program/prog_parameter.h" +#include "nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + bool progress; +}; + +static uint32_t +get_surface_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + gl_shader_stage stage = state->shader->stage; + + assert(binding < set_layout->binding_count); + + assert(set_layout->binding[binding].stage[stage].surface_index >= 0); + + uint32_t surface_index = + state->layout->set[set].stage[stage].surface_start + + set_layout->binding[binding].stage[stage].surface_index; + + assert(surface_index < state->layout->stage[stage].surface_count); + + return surface_index; +} + +static uint32_t +get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + if (set_layout->binding[binding].stage[stage].sampler_index < 0) { + assert(tex_op == nir_texop_txf); + return 0; + } + + uint32_t sampler_index = + state->layout->set[set].stage[stage].sampler_start + + set_layout->binding[binding].stage[stage].sampler_index; + + assert(sampler_index < state->layout->stage[stage].sampler_count); + + return sampler_index; +} + +static uint32_t +get_image_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + assert(set_layout->binding[binding].stage[stage].image_index >= 0); + + uint32_t image_index = + state->layout->set[set].stage[stage].image_start + + set_layout->binding[binding].stage[stage].image_index; + + assert(image_index < state->layout->stage[stage].image_count); + + return image_index; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = intrin->const_index[0]; + uint32_t binding = intrin->const_index[1]; + + uint32_t surface_index = get_surface_index(set, binding, state); + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->sampler); + + nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler; + tex->texture_index = + get_surface_index(tex_deref->var->data.descriptor_set, + tex_deref->var->data.binding, state); + lower_tex_deref(tex, tex_deref, &tex->texture_index, + nir_tex_src_texture_offset, state); + + tex->sampler_index = + get_sampler_index(tex->sampler->var->data.descriptor_set, + tex->sampler->var->data.binding, tex->op, state); + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + + if (tex->texture) + cleanup_tex_deref(tex, tex->texture); + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + state->progress = true; + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + /* All texture instructions need lowering */ + state->progress = true; + break; + default: + continue; + } + } + + return true; +} + +static void +setup_vec4_uniform_value(const union gl_constant_value **params, + const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +bool +anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, + const struct anv_pipeline_layout *layout) +{ + struct apply_pipeline_layout_state state = { + .shader = shader, + .layout = layout, + }; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (layout->stage[shader->stage].image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned image_index = get_image_index(var->data.descriptor_set, + var->data.binding, &state); + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE * 4; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += layout->stage[shader->stage].image_count * + BRW_IMAGE_PARAM_SIZE * 4; + } + + return state.progress; +} diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c new file mode 100644 index 00000000000..53cd3d73793 --- /dev/null +++ b/src/vulkan/anv_nir_lower_push_constants.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" + +struct lower_push_constants_state { + nir_shader *shader; + bool is_scalar; +}; + +static bool +lower_push_constants_block(nir_block *block, void *void_state) +{ + struct lower_push_constants_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* TODO: Handle indirect push constants */ + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + /* This wont work for vec4 stages. */ + assert(state->is_scalar); + + assert(intrin->const_index[0] % 4 == 0); + assert(intrin->const_index[1] == 128); + + /* We just turn them into uniform loads with the appropreate offset */ + intrin->intrinsic = nir_intrinsic_load_uniform; + } + + return true; +} + +void +anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) +{ + struct lower_push_constants_state state = { + .shader = shader, + .is_scalar = is_scalar, + }; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, lower_push_constants_block, &state); + } + + assert(shader->num_uniforms % 4 == 0); + if (is_scalar) + shader->num_uniforms /= 4; + else + shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); +} diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c new file mode 100644 index 00000000000..d07e9fec6cc --- /dev/null +++ b/src/vulkan/anv_pass.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + uint32_t subpass_attachment_count = 0, *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + /* Count colorAttachmentCount again for resolve_attachments */ + desc->colorAttachmentCount; + } + + pass->subpass_attachments = + anv_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + anv_free2(&device->alloc, pAllocator, pass); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] + = desc->pInputAttachments[j].attachment; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] + = desc->pColorAttachments[j].attachment; + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = a; + if (a != VK_ATTACHMENT_UNUSED) + subpass->has_resolve = true; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } else { + subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; + } + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); + anv_free2(&device->alloc, pAllocator, pass); +} + +void anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; +} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c new file mode 100644 index 00000000000..3c5072ba924 --- /dev/null +++ b/src/vulkan/anv_pipeline.c @@ -0,0 +1,1300 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "nir/spirv/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); + nir = entry_point->shader; + assert(nir->stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + + nir_lower_system_values(nir); + nir_validate_shader(nir); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entry_point->impl); + + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + + return nir; +} + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); +} + +static uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + pthread_mutex_lock(&cache->mutex); + + struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + pthread_mutex_unlock(&cache->mutex); + + assert(size < cache->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset; +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + size_t* pDataSize, + void* pData) +{ + *pDataSize = 0; + + return VK_SUCCESS; +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_SUCCESS); +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage, + spec_info); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) + prog_data->nr_params += pipeline->layout->stage[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ + prog_data->param = (const union gl_constant_value **) + malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + nir = brw_nir_lower_io(nir, &pipeline->device->info, + compiler->scalar_stage[stage]); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + + populate_gs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + pipeline->gs_kernel = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + pipeline->gs_vertex_count = nir->info.gs.vertices_in; + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.work_groups_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + prog_data->base.total_shared = nir->num_shared; + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h new file mode 100644 index 00000000000..8e12792456b --- /dev/null +++ b/src/vulkan/anv_private.h @@ -0,0 +1,1823 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <pthread.h> +#include <assert.h> +#include <stdint.h> +#include <i915_drm.h> + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" +#include "util/list.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#define VK_USE_PLATFORM_XCB_KHR +#define VK_USE_PLATFORM_WAYLAND_KHR + +#define VK_PROTOTYPES +#include <vulkan/vulkan.h> +#include <vulkan/vulkan_intel.h> +#include <vulkan/vk_icd.h> + +#include "anv_entrypoints.h" +#include "anv_gen_macros.h" +#include "brw_context.h" +#include "isl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline float +anv_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +#define typed_memcpy(dest, src, count) ({ \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ +}) + +#define zero(x) (memset(&(x), 0, sizeof(x))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); + +#ifdef DEBUG +#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); +#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); +#else +#define vk_error(error) error +#define vk_errorf(error, format, ...) error +#endif + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +/** + * If a block of code is annotated with anv_validate, then the block runs only + * in debug builds. + */ +#ifdef DEBUG +#define anv_validate if (1) +#else +#define anv_validate if (0) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void * +anv_vector_head(struct anv_vector *vector) +{ + assert(vector->tail < vector->head); + return (void *)((char *)vector->data + + ((vector->head - vector->element_size) & + (vector->size - 1))); +} + +static inline void * +anv_vector_tail(struct anv_vector *vector) +{ + return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + uint32_t gem_handle; + + /* Index into the current validation list. This is used by the + * validation list building alrogithm to track which buffers are already + * in the validation list so that we can ensure uniqueness. + */ + uint32_t index; + + /* Last known offset. This value is provided by the kernel when we + * execbuf and is used as the presumed offset for the next bunch of + * relocations. + */ + uint64_t offset; + + uint64_t size; + void *map; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + int32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + + /* The offset from the start of the bo to the "center" of the block + * pool. Pointers to allocated blocks are given by + * bo.map + center_bo_offset + offsets. + */ + uint32_t center_bo_offset; + + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + */ + void *map; + int fd; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + union anv_free_list free_list; + struct anv_block_state state; + + union anv_free_list back_free_list; + struct anv_block_state back_state; +}; + +/* Block pools are backed by a fixed-size 2GB memfd */ +#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) + +/* The center of the block pool is also the middle of the memfd. This may + * change in the future if we decide differently for some reason. + */ +#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) + +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->state.end + pool->back_state.end; +} + +struct anv_state { + int32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream_block; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + + /* The current working block */ + struct anv_state_stream_block *block; + + /* Offset at which the current block starts */ + uint32_t start; + /* Offset at which to allocate the next state */ + uint32_t next; + /* Offset at which the current block ends */ + uint32_t end; +}; + +#define CACHELINE_SIZE 64 +#define CACHELINE_MASK 63 + +static void inline +anv_state_clflush(struct anv_state state) +{ + /* state.map may not be cacheline aligned, so round down the start pointer + * to a cacheline boundary so we flush all pages that contain the state. + */ + void *end = state.map + state.alloc_size; + void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK); + + __builtin_ia32_mfence(); + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } +} + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + + +void *anv_resolve_entrypoint(uint32_t index); + +extern struct anv_dispatch_table dtable; + +#define ANV_CALL(func) ({ \ + if (dtable.func == NULL) { \ + size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ + dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ + } \ + dtable.func; \ +}) + +static inline void * +anv_alloc(const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnAllocation(alloc->pUserData, size, align, scope); +} + +static inline void * +anv_realloc(const VkAllocationCallbacks *alloc, + void *ptr, size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); +} + +static inline void +anv_free(const VkAllocationCallbacks *alloc, void *data) +{ + alloc->pfnFree(alloc->pUserData, data); +} + +static inline void * +anv_alloc2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + if (alloc) + return anv_alloc(alloc, size, align, scope); + else + return anv_alloc(parent_alloc, size, align, scope); +} + +static inline void +anv_free2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + void *data) +{ + if (alloc) + anv_free(alloc, data); + else + anv_free(parent_alloc, data); +} + +struct anv_physical_device { + VK_LOADER_DATA _loader_data; + + struct anv_instance * instance; + uint32_t chipset_id; + const char * path; + const char * name; + const struct brw_device_info * info; + uint64_t aperture_size; + struct brw_compiler * compiler; + struct isl_device isl_dev; +}; + +struct anv_wsi_interaface; + +#define VK_ICD_WSI_PLATFORM_MAX 5 + +struct anv_instance { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + uint32_t apiVersion; + int physicalDeviceCount; + struct anv_physical_device physicalDevice; + + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; +}; + +VkResult anv_init_wsi(struct anv_instance *instance); +void anv_finish_wsi(struct anv_instance *instance); + +struct anv_meta_state { + VkAllocationCallbacks alloc; + + /** + * Use array element `i` for images with `2^i` samples. + */ + struct { + /** + * Pipeline N is used to clear color attachment N of the current + * subpass. + * + * HACK: We use one pipeline per color attachment to work around the + * compiler's inability to dynamically set the render target index of + * the render target write message. + */ + struct anv_pipeline *color_pipelines[MAX_RTS]; + + struct anv_pipeline *depth_only_pipeline; + struct anv_pipeline *stencil_only_pipeline; + struct anv_pipeline *depthstencil_pipeline; + } clear[1 + MAX_SAMPLES_LOG2]; + + struct { + VkRenderPass render_pass; + + /** Pipeline that blits from a 1D image. */ + VkPipeline pipeline_1d_src; + + /** Pipeline that blits from a 2D image. */ + VkPipeline pipeline_2d_src; + + /** Pipeline that blits from a 3D image. */ + VkPipeline pipeline_3d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + /** Pipeline [i] resolves an image with 2^(i+1) samples. */ + VkPipeline pipelines[MAX_SAMPLES_LOG2]; + + VkRenderPass pass; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } resolve; +}; + +struct anv_queue { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_state_pool * pool; +}; + +struct anv_pipeline_cache { + struct anv_device * device; + struct anv_state_stream program_stream; + pthread_mutex_t mutex; +}; + +void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device); +void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); + +struct anv_device { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + struct isl_device isl_dev; + int context_id; + int fd; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_pipeline_cache default_pipeline_cache; + + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_bo workaround_bo; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + pthread_mutex_t mutex; +}; + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); +int anv_gem_get_aperture(int fd, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); +void anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* Last seen surface state block pool bo offset */ + uint32_t last_ss_pool_bo_offset; + + struct anv_reloc_list relocs; +}; + +struct anv_batch { + const VkAllocationCallbacks * alloc; + + void * start; + void * end; + void * next; + + struct anv_reloc_list * relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return address.offset + delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +/* Wrapper macros needed to work around preprocessor argument issues. In + * particular, arguments don't get pre-evaluated if they are concatenated. + * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the + * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". + * We can work around this easily enough with these helpers. + */ +#define __anv_cmd_length(cmd) cmd ## _length +#define __anv_cmd_length_bias(cmd) cmd ## _length_bias +#define __anv_cmd_header(cmd) cmd ## _header +#define __anv_cmd_pack(cmd) cmd ## _pack + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + .DwordLength = n - __anv_cmd_length_bias(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ + const uint32_t __size = __anv_cmd_length(cmd) * 4; \ + struct anv_state __state = \ + anv_state_pool_alloc((pool), __size, align); \ + struct cmd __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ + if (!(pool)->block_pool->device->info.has_llc) \ + anv_state_clflush(__state); \ + __state; \ + }) + +#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ + .GraphicsDataTypeGFDT = 0, \ + .LLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ + .LLCeLLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ + +#define GEN9_MOCS { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ + } + +#define GEN9_MOCS_PTE { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ + } + +struct anv_device_memory { + struct anv_bo bo; + uint32_t type_index; + VkDeviceSize map_size; + void * map; +}; + +/** + * Header for Vertex URB Entry (VUE) + */ +struct anv_vue_header { + uint32_t Reserved; + uint32_t RTAIndex; /* RenderTargetArrayIndex */ + uint32_t ViewportIndex; + float PointWidth; +}; + +struct anv_descriptor_set_binding_layout { + /* Number of array elements in this binding */ + uint16_t array_size; + + /* Index into the flattend descriptor set */ + uint16_t descriptor_index; + + /* Index into the dynamic state array for a dynamic buffer */ + int16_t dynamic_offset_index; + + /* Index into the descriptor set buffer views */ + int16_t buffer_index; + + struct { + /* Index into the binding table for the associated surface */ + int16_t surface_index; + + /* Index into the sampler table for the associated sampler */ + int16_t sampler_index; + + /* Index into the image table for the associated image */ + int16_t image_index; + } stage[MESA_SHADER_STAGES]; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct anv_sampler **immutable_samplers; +}; + +struct anv_descriptor_set_layout { + /* Number of bindings in this descriptor set */ + uint16_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint16_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of buffers in this descriptor set */ + uint16_t buffer_count; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; + + /* Bindings in this descriptor set */ + struct anv_descriptor_set_binding_layout binding[0]; +}; + +struct anv_descriptor { + VkDescriptorType type; + + union { + struct { + struct anv_image_view *image_view; + struct anv_sampler *sampler; + }; + + struct anv_buffer_view *buffer_view; + }; +}; + +struct anv_descriptor_set { + const struct anv_descriptor_set_layout *layout; + uint32_t buffer_count; + struct anv_buffer_view *buffer_views; + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +struct anv_pipeline_binding { + /* The descriptor set this surface corresponds to */ + uint16_t set; + + /* Offset into the descriptor set */ + uint16_t offset; +}; + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; + struct { + uint32_t surface_start; + uint32_t sampler_start; + uint32_t image_start; + } stage[MESA_SHADER_STAGES]; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + bool has_dynamic_offsets; + uint32_t surface_count; + struct anv_pipeline_binding *surface_to_descriptor; + uint32_t sampler_count; + struct anv_pipeline_binding *sampler_to_descriptor; + uint32_t image_count; + } stage[MESA_SHADER_STAGES]; + + struct anv_pipeline_binding entries[0]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + VkBufferUsageFlags usage; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +enum anv_cmd_dirty_bits { + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, +}; +typedef uint32_t anv_cmd_dirty_mask_t; + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_push_constants { + /* Current allocated size of this push constants data structure. + * Because a decent chunk of it may not be used (images on SKL, for + * instance), we won't actually allocate the entire structure up-front. + */ + uint32_t size; + + /* Push constant data provided by the client through vkPushConstants */ + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + + /* Our hardware only provides zero-based vertex and instance id so, in + * order to satisfy the vulkan requirements, we may have to push one or + * both of these into the shader. + */ + uint32_t base_vertex; + uint32_t base_instance; + + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; + + /* Image data for image_load_store on pre-SKL */ + struct brw_image_param images[MAX_IMAGES]; +}; + +struct anv_dynamic_state { + struct { + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; + } viewport; + + struct { + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; + } scissor; + + float line_width; + + struct { + float bias; + float clamp; + float slope; + } depth_bias; + + float blend_constants[4]; + + struct { + float min; + float max; + } depth_bounds; + + struct { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_reference; +}; + +extern const struct anv_dynamic_state default_dynamic_state; + +void anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); + +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct anv_attachment_state { + VkImageAspectFlags pending_clear_aspects; + VkClearValue clear_value; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + /* PIPELINE_SELECT.PipelineSelection */ + uint32_t current_pipeline; + uint32_t current_l3_config; + uint32_t vb_dirty; + anv_cmd_dirty_mask_t dirty; + anv_cmd_dirty_mask_t compute_dirty; + uint32_t num_workgroups_offset; + struct anv_bo *num_workgroups_bo; + VkShaderStageFlags descriptors_dirty; + VkShaderStageFlags push_constants_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + uint32_t restart_index; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set * descriptors[MAX_SETS]; + struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_state binding_tables[MESA_SHADER_STAGES]; + struct anv_state samplers[MESA_SHADER_STAGES]; + struct anv_dynamic_state dynamic; + bool need_query_wa; + + /** + * Array length is anv_cmd_state::pass::attachment_count. Array content is + * valid only when recording a render pass instance. + */ + struct anv_attachment_state * attachments; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ + uint32_t index_offset; + } gen7; +}; + +struct anv_cmd_pool { + VkAllocationCallbacks alloc; + struct list_head cmd_buffers; +}; + +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + +struct anv_cmd_buffer { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_cmd_pool * pool; + struct list_head pool_link; + + struct anv_batch batch; + + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ + struct list_head batch_bos; + enum anv_cmd_buffer_exec_mode exec_mode; + + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + + /* A vector of int32_t's for every block of binding tables. + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector bt_blocks; + uint32_t bt_next; + struct anv_reloc_list surface_relocs; + + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). + */ + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ + uint32_t serial; + + /* Stream objects for storing temporary data */ + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + + struct anv_cmd_state state; +}; + +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); + +VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state); +VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state); +uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages); + +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment); +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment); + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); + +void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info); + +void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage); +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); + +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_event { + uint32_t semaphore; + struct anv_state state; +}; + +struct nir_shader; + +struct anv_shader_module { + struct nir_shader * nir; + + uint32_t size; + char data[0]; +}; + +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + +#define anv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ + stage = __builtin_ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[512]; + struct anv_reloc_list batch_relocs; + uint32_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; + + struct anv_pipeline_layout * layout; + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + uint32_t scratch_start[MESA_SHADER_STAGES]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + VkShaderStageFlags active_stages; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t vs_vec4; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t ps_ksp0; + uint32_t ps_ksp2; + uint32_t ps_grf_start0; + uint32_t ps_grf_start2; + uint32_t gs_kernel; + uint32_t gs_vertex_count; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + bool instancing_enable[MAX_VBS]; + bool primitive_restart; + uint32_t topology; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; + + struct { + uint32_t sf[7]; + uint32_t depth_stencil_state[3]; + } gen7; + + struct { + uint32_t sf[4]; + uint32_t raster[5]; + uint32_t wm_depth_stencil[3]; + } gen8; + + struct { + uint32_t wm_depth_stencil[4]; + } gen9; +}; + +struct anv_graphics_pipeline_create_info { + /** + * If non-negative, overrides the color attachment count of the pipeline's + * subpass. + */ + int8_t color_attachment_count; + + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc); + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +VkResult +anv_graphics_pipeline_create(VkDevice device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen7_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen75_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen7_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen75_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +struct anv_format_swizzle { + unsigned r:2; + unsigned g:2; + unsigned b:2; + unsigned a:2; +}; + +struct anv_format { + const VkFormat vk_format; + const char *name; + enum isl_format surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + const struct isl_format_layout *isl_layout; + uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + struct anv_format_swizzle swizzle; + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle); + +static inline bool +anv_format_is_color(const struct anv_format *format) +{ + return !format->depth_format && !format->has_stencil; +} + +static inline bool +anv_format_is_depth_or_stencil(const struct anv_format *format) +{ + return format->depth_format || format->has_stencil; +} + +/** + * Subsurface of an anv_image. + */ +struct anv_surface { + struct isl_surf isl; + + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; +}; + +struct anv_image { + VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + const struct anv_format *format; + VkExtent3D extent; + uint32_t levels; + uint32_t array_size; + uint32_t samples; /**< VkImageCreateInfo::samples */ + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + /** + * Image subsurfaces + * + * For each foo, anv_image::foo_surface is valid if and only if + * anv_image::format has a foo aspect. + * + * The hardware requires that the depth buffer and stencil buffer be + * separate surfaces. From Vulkan's perspective, though, depth and stencil + * reside in the same VkImage. To satisfy both the hardware and Vulkan, we + * allocate the depth and stencil buffers as separate surfaces in the same + * bo. + */ + union { + struct anv_surface color_surface; + + struct { + struct anv_surface depth_surface; + struct anv_surface stencil_surface; + }; + }; +}; + +struct anv_image_view { + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + VkComponentMapping swizzle; + enum isl_format format; + uint32_t base_layer; + uint32_t base_mip; + VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + /** RENDER_SURFACE_STATE when using image as a color render target. */ + struct anv_state color_rt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a sampler surface. */ + struct anv_state sampler_surface_state; + + /** RENDER_SURFACE_STATE when using image as a storage image. */ + struct anv_state storage_surface_state; +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + isl_tiling_flags_t isl_tiling_flags; + uint32_t stride; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + const VkAllocationCallbacks* alloc, + VkImage *pImage); + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, + VkImageAspectFlags aspect_mask); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset); + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen7_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen75_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen8_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen9_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); + +struct anv_buffer_view { + enum isl_format format; /**< VkBufferViewCreateInfo::format */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + uint64_t range; /**< VkBufferViewCreateInfo::range */ + + struct anv_state surface_state; + struct anv_state storage_surface_state; +}; + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type); + +void anv_fill_buffer_surface_state(struct anv_device *device, + struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void gen7_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen75_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen8_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen9_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param); +void anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct anv_image_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; +}; + +struct anv_render_pass_attachment { + const struct anv_format *format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + uint32_t * subpass_attachments; + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +extern struct anv_render_pass anv_meta_dummy_renderpass; + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +void anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *)(uintptr_t) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType)(uintptr_t) _obj; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c new file mode 100644 index 00000000000..5b052341e0c --- /dev/null +++ b/src/vulkan/anv_query.c @@ -0,0 +1,188 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + uint32_t slot_size; + uint64_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_ERROR_INCOMPATIBLE_DRIVER; + default: + assert(!"Invalid query type"); + } + + slot_size = sizeof(struct anv_query_pool_slot); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + pool->slots = pCreateInfo->queryCount; + + size = pCreateInfo->queryCount * slot_size; + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, pool); + + return result; +} + +void anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void* pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + int64_t timeout = INT64_MAX; + uint64_t result; + int ret; + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem_wait failed %m"); + } + } + + void *data_end = pData + dataSize; + struct anv_query_pool_slot *slot = pool->bo.map; + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + result = slot[firstQuery + i].end - slot[firstQuery + i].begin; + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + /* Not yet implemented */ + break; + case VK_QUERY_TYPE_TIMESTAMP: { + result = slot[firstQuery + i].begin; + break; + } + default: + assert(!"Invalid query type"); + } + + if (flags & VK_QUERY_RESULT_64_BIT) { + uint64_t *dst = pData; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } else { + uint32_t *dst = pData; + if (result > UINT32_MAX) + result = UINT32_MAX; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } + + pData += stride; + if (pData >= data_end) + break; + } + + return VK_SUCCESS; +} + +void anv_CmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: { + struct anv_query_pool_slot *slot = pool->bo.map; + slot[firstQuery + i].available = 0; + break; + } + default: + assert(!"Invalid query type"); + } + } +} diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c new file mode 100644 index 00000000000..22fd01c9495 --- /dev/null +++ b/src/vulkan/anv_util.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +VkResult +__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + + /* Core errors */ + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + + /* Extension errors */ + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(util_is_power_of_two(size)); + assert(element_size < size && util_is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c new file mode 100644 index 00000000000..c5911a3635b --- /dev/null +++ b/src/vulkan/anv_wsi.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_wsi.h" + +VkResult +anv_init_wsi(struct anv_instance *instance) +{ + VkResult result; + + result = anv_x11_init_wsi(instance); + if (result != VK_SUCCESS) + return result; + +#ifdef HAVE_WAYLAND_PLATFORM + result = anv_wl_init_wsi(instance); + if (result != VK_SUCCESS) { + anv_x11_finish_wsi(instance); + return result; + } +#endif + + return VK_SUCCESS; +} + +void +anv_finish_wsi(struct anv_instance *instance) +{ +#ifdef HAVE_WAYLAND_PLATFORM + anv_wl_finish_wsi(instance); +#endif + anv_x11_finish_wsi(instance); +} + +void anv_DestroySurfaceKHR( + VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + + anv_free2(&instance->alloc, pAllocator, surface); +} + +VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR _surface, + VkBool32* pSupported) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_support(surface, device, queueFamilyIndex, pSupported); +} + +VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_capabilities(surface, device, pSurfaceCapabilities); +} + +VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); +} + +VkResult anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + struct anv_swapchain *swapchain; + + VkResult result = iface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); + if (result != VK_SUCCESS) + return result; + + *pSwapchain = anv_swapchain_to_handle(swapchain); + + return VK_SUCCESS; +} + +void anv_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + swapchain->destroy(swapchain, pAllocator); +} + +VkResult anv_GetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult anv_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->acquire_next_image(swapchain, timeout, semaphore, + pImageIndex); +} + +VkResult anv_QueuePresentKHR( + VkQueue _queue, + const VkPresentInfoKHR* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + VkResult result; + + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); + + assert(swapchain->device == queue->device); + + result = swapchain->queue_present(swapchain, queue, + pPresentInfo->pImageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h new file mode 100644 index 00000000000..6e9ff9b8447 --- /dev/null +++ b/src/vulkan/anv_wsi.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +struct anv_swapchain; + +struct anv_wsi_interface { + VkResult (*get_support)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported); + VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + VkResult (*get_formats)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); +}; + +struct anv_swapchain { + struct anv_device *device; + + VkResult (*destroy)(struct anv_swapchain *swapchain, + const VkAllocationCallbacks *pAllocator); + VkResult (*get_images)(struct anv_swapchain *swapchain, + uint32_t *pCount, VkImage *pSwapchainImages); + VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, + uint64_t timeout, VkSemaphore semaphore, + uint32_t *image_index); + VkResult (*queue_present)(struct anv_swapchain *swap_chain, + struct anv_queue *queue, + uint32_t image_index); +}; + +ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) + +VkResult anv_x11_init_wsi(struct anv_instance *instance); +void anv_x11_finish_wsi(struct anv_instance *instance); +VkResult anv_wl_init_wsi(struct anv_instance *instance); +void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c new file mode 100644 index 00000000000..31d5f836a9f --- /dev/null +++ b/src/vulkan/anv_wsi_wayland.c @@ -0,0 +1,870 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <wayland-client.h> +#include <wayland-drm-client-protocol.h> + +#include "anv_wsi.h" + +#include <util/hash_table.h> + +#define MIN_NUM_IMAGES 2 + +struct wsi_wl_display { + struct wl_display * display; + struct wl_drm * drm; + + /* Vector of VkFormats supported */ + struct anv_vector formats; + + uint32_t capabilities; +}; + +struct wsi_wayland { + struct anv_wsi_interface base; + + struct anv_instance * instance; + + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; +}; + +static void +wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) +{ + /* Don't add a format that's already in the list */ + VkFormat *f; + anv_vector_foreach(f, &display->formats) + if (*f == format) + return; + + /* Don't add formats which aren't supported by the driver */ + if (anv_format_for_vk_format(format)->surface_format == + ISL_FORMAT_UNSUPPORTED) { + return; + } + + f = anv_vector_add(&display->formats); + if (f) + *f = format; +} + +static void +drm_handle_device(void *data, struct wl_drm *drm, const char *name) +{ + fprintf(stderr, "wl_drm.device(%s)\n", name); +} + +static uint32_t +wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) +{ + switch (vk_format) { + /* TODO: Figure out what all the formats mean and make this table + * correct. + */ +#if 0 + case VK_FORMAT_R4G4B4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; + case VK_FORMAT_R5G6B5_UNORM: + return WL_DRM_FORMAT_BGR565; + case VK_FORMAT_R5G5B5A1_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; + case VK_FORMAT_R8G8B8_UNORM: + return WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R8G8B8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R10G10B10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; + case VK_FORMAT_B4G4R4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; + case VK_FORMAT_B5G6R5_UNORM: + return WL_DRM_FORMAT_RGB565; + case VK_FORMAT_B5G5R5A1_UNORM: + return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; +#endif + case VK_FORMAT_B8G8R8_SRGB: + return WL_DRM_FORMAT_BGRX8888; + case VK_FORMAT_B8G8R8A8_SRGB: + return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; +#if 0 + case VK_FORMAT_B10G10R10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; +#endif + + default: + assert("!Unsupported Vulkan format"); + return 0; + } +} + +static void +drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) +{ + struct wsi_wl_display *display = data; + + switch (wl_format) { +#if 0 + case WL_DRM_FORMAT_ABGR4444: + case WL_DRM_FORMAT_XBGR4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); + break; + case WL_DRM_FORMAT_BGR565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); + break; + case WL_DRM_FORMAT_ABGR1555: + case WL_DRM_FORMAT_XBGR1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); + break; + case WL_DRM_FORMAT_XBGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ABGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); + break; + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); + break; + case WL_DRM_FORMAT_ARGB4444: + case WL_DRM_FORMAT_XRGB4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); + break; + case WL_DRM_FORMAT_RGB565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); + break; + case WL_DRM_FORMAT_ARGB1555: + case WL_DRM_FORMAT_XRGB1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); + break; +#endif + case WL_DRM_FORMAT_XRGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); + /* fallthrough */ + case WL_DRM_FORMAT_ARGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); + break; +#if 0 + case WL_DRM_FORMAT_ARGB2101010: + case WL_DRM_FORMAT_XRGB2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); + break; +#endif + } +} + +static void +drm_handle_authenticated(void *data, struct wl_drm *drm) +{ +} + +static void +drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) +{ + struct wsi_wl_display *display = data; + + display->capabilities = capabilities; +} + +static const struct wl_drm_listener drm_listener = { + drm_handle_device, + drm_handle_format, + drm_handle_authenticated, + drm_handle_capabilities, +}; + +static void +registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct wsi_wl_display *display = data; + + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm == NULL); + + assert(version >= 2); + display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); + + if (display->drm) + wl_drm_add_listener(display->drm, &drm_listener, display); + } +} + +static void +registry_handle_global_remove(void *data, struct wl_registry *registry, + uint32_t name) +{ /* No-op */ } + +static const struct wl_registry_listener registry_listener = { + registry_handle_global, + registry_handle_global_remove +}; + +static void +wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) +{ + anv_vector_finish(&display->formats); + if (display->drm) + wl_drm_destroy(display->drm); + anv_free(&wsi->instance->alloc, display); +} + +static struct wsi_wl_display * +wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + struct wsi_wl_display *display = + anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!display) + return NULL; + + memset(display, 0, sizeof(*display)); + + display->display = wl_display; + + if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) + goto fail; + + struct wl_registry *registry = wl_display_get_registry(wl_display); + if (!registry) + return NULL; + + wl_registry_add_listener(registry, ®istry_listener, display); + + /* Round-rip to get the wl_drm global */ + wl_display_roundtrip(wl_display); + + if (!display->drm) + goto fail; + + /* Round-rip to get wl_drm formats and capabilities */ + wl_display_roundtrip(wl_display); + + /* We need prime support */ + if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) + goto fail; + + /* We don't need this anymore */ + wl_registry_destroy(registry); + + return display; + +fail: + if (registry) + wl_registry_destroy(registry); + + wsi_wl_display_destroy(wsi, display); + return NULL; +} + +static struct wsi_wl_display * +wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, + wl_display); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->displays, wl_display); + if (entry) { + /* Oops, someone raced us to it */ + wsi_wl_display_destroy(wsi, display); + } else { + entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_display(physical_device->instance, display) != NULL; +} + +static VkResult +wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + *pSupported = true; + + return VK_SUCCESS; +} + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_KHR, +}; + +static VkResult +wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* caps) +{ + caps->minImageCount = MIN_NUM_IMAGES; + caps->maxImageCount = 4; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + + caps->supportedCompositeAlpha = + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_display *display = + wsi_wl_get_display(device->instance, surface->display); + + uint32_t count = anv_vector_length(&display->formats); + + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = count; + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= count); + *pSurfaceFormatCount = count; + + VkFormat *f; + anv_vector_foreach(f, &display->formats) { + *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { + .format = *f, + /* TODO: We should get this from the compositor somehow */ + .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + }; + } + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateWaylandSurfaceKHR( + VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceWayland *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; + surface->display = pCreateInfo->display; + surface->surface = pCreateInfo->surface; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct wsi_wl_image { + struct anv_image * image; + struct anv_device_memory * memory; + struct wl_buffer * buffer; + bool busy; +}; + +struct wsi_wl_swapchain { + struct anv_swapchain base; + + struct wsi_wl_display * display; + struct wl_event_queue * queue; + struct wl_surface * surface; + + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; + + VkPresentModeKHR present_mode; + bool fifo_ready; + + uint32_t image_count; + struct wsi_wl_image images[0]; +}; + +static VkResult +wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + int ret = wl_display_dispatch_queue_pending(chain->display->display, + chain->queue); + /* XXX: I'm not sure if out-of-date is the right error here. If + * wl_display_dispatch_queue_pending fails it most likely means we got + * kicked by the server so this seems more-or-less correct. + */ + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + *image_index = i; + return VK_SUCCESS; + } + } + + /* This time we do a blocking dispatch because we can't go + * anywhere until we get an event. + */ + int ret = wl_display_roundtrip_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } +} + +static void +frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) +{ + struct wsi_wl_swapchain *chain = data; + + chain->fifo_ready = true; + + wl_callback_destroy(callback); +} + +static const struct wl_callback_listener frame_listener = { + frame_handle_done, +}; + +static VkResult +wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + } + + assert(image_index < chain->image_count); + wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); + wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + struct wl_callback *frame = wl_surface_frame(chain->surface); + wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); + wl_callback_add_listener(frame, &frame_listener, chain); + chain->fifo_ready = false; + } + + chain->images[image_index].busy = true; + wl_surface_commit(chain->surface); + wl_display_flush(chain->display->display); + + return VK_SUCCESS; +} + +static void +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), + pAllocator); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), + pAllocator); +} + +static void +buffer_handle_release(void *data, struct wl_buffer *buffer) +{ + struct wsi_wl_image *image = data; + + assert(image->buffer == buffer); + + image->busy = false; +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static VkResult +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + VkResult result; + + VkImage vk_image; + result = anv_image_create(vk_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = chain->vk_format, + .extent = { + .width = chain->extent.width, + .height = chain->extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, + &vk_image); + + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(vk_image); + assert(anv_format_is_color(image->image->format)); + + struct anv_surface *surface = &image->image->color_surface; + + VkDeviceMemory vk_memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + pAllocator, + &vk_memory); + + if (result != VK_SUCCESS) + goto fail_image; + + image->memory = anv_device_memory_from_handle(vk_memory); + + result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); + + if (result != VK_SUCCESS) + goto fail_mem; + + int ret = anv_gem_set_tiling(chain->base.device, + image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + int fd = anv_gem_handle_to_fd(chain->base.device, + image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + image->buffer = wl_drm_create_prime_buffer(chain->display->drm, + fd, /* name */ + chain->extent.width, + chain->extent.height, + chain->drm_format, + surface->offset, + surface->isl.row_pitch, + 0, 0, 0, 0 /* unused */); + wl_display_roundtrip(chain->display->display); + close(fd); + + wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); + wl_buffer_add_listener(image->buffer, &buffer_listener, image); + + return VK_SUCCESS; + +fail_mem: + anv_FreeMemory(vk_device, vk_memory, pAllocator); +fail_image: + anv_DestroyImage(vk_device, vk_image, pAllocator); + + return result; +} + +static VkResult +wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) { + if (chain->images[i].buffer) + wsi_wl_image_finish(chain, &chain->images[i], pAllocator); + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_swapchain *chain; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + int num_images = pCreateInfo->minImageCount; + + assert(num_images >= MIN_NUM_IMAGES); + + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = wsi_wl_swapchain_destroy; + chain->base.get_images = wsi_wl_swapchain_get_images; + chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; + chain->base.queue_present = wsi_wl_swapchain_queue_present; + + chain->surface = surface->surface; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; + chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); + + chain->present_mode = pCreateInfo->presentMode; + chain->fifo_ready = true; + + chain->image_count = num_images; + + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ + for (uint32_t i = 0; i < chain->image_count; i++) + chain->images[i].buffer = NULL; + chain->queue = NULL; + + chain->display = wsi_wl_get_display(device->instance, surface->display); + if (!chain->display) + goto fail; + + chain->queue = wl_display_create_queue(chain->display->display); + if (!chain->queue) + goto fail; + + for (uint32_t i = 0; i < chain->image_count; i++) { + result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); + if (result != VK_SUCCESS) + goto fail; + chain->images[i].busy = false; + } + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + +fail: + wsi_wl_swapchain_destroy(&chain->base, pAllocator); + + return result; +} + +VkResult +anv_wl_init_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + wsi->instance = instance; + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->displays) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = wsi_wl_surface_get_support; + wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; + wsi->base.get_formats = wsi_wl_surface_get_formats; + wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; + wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); + +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; + + return result; +} + +void +anv_wl_finish_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->displays, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c new file mode 100644 index 00000000000..acb4a60e65f --- /dev/null +++ b/src/vulkan/anv_wsi_x11.c @@ -0,0 +1,757 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xcb/xcb.h> +#include <xcb/dri3.h> +#include <xcb/present.h> + +#include "anv_wsi.h" + +#include "util/hash_table.h" + +struct wsi_x11_connection { + bool has_dri3; + bool has_present; +}; + +struct wsi_x11 { + struct anv_wsi_interface base; + + pthread_mutex_t mutex; + /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + struct hash_table *connections; +}; + +static struct wsi_x11_connection * +wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) +{ + xcb_query_extension_cookie_t dri3_cookie, pres_cookie; + xcb_query_extension_reply_t *dri3_reply, *pres_reply; + + struct wsi_x11_connection *wsi_conn = + anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi_conn) + return NULL; + + dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); + pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); + + dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); + pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); + if (dri3_reply == NULL || pres_reply == NULL) { + free(dri3_reply); + free(pres_reply); + anv_free(&instance->alloc, wsi_conn); + return NULL; + } + + wsi_conn->has_dri3 = dri3_reply->present != 0; + wsi_conn->has_present = pres_reply->present != 0; + + free(dri3_reply); + free(pres_reply); + + return wsi_conn; +} + +static void +wsi_x11_connection_destroy(struct anv_instance *instance, + struct wsi_x11_connection *conn) +{ + anv_free(&instance->alloc, conn); +} + +static struct wsi_x11_connection * +wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_connection_create(instance, conn); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->connections, conn); + if (entry) { + /* Oops, someone raced us to it */ + wsi_x11_connection_destroy(instance, wsi_conn); + } else { + entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +static const VkSurfaceFormatKHR formats[] = { + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, +}; + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, +}; + +static xcb_screen_t * +get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + if (screen_iter.data->root == root) + return screen_iter.data; + } + + return NULL; +} + +static xcb_visualtype_t * +screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_depth_iterator_t depth_iter = + xcb_screen_allowed_depths_iterator(screen); + + for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { + xcb_visualtype_iterator_t visual_iter = + xcb_depth_visuals_iterator (depth_iter.data); + + for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { + if (visual_iter.data->visual_id == visual_id) { + if (depth) + *depth = depth_iter.data->depth; + return visual_iter.data; + } + } + } + + return NULL; +} + +static xcb_visualtype_t * +connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + /* For this we have to iterate over all of the screens which is rather + * annoying. Fortunately, there is probably only 1. + */ + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, + visual_id, depth); + if (visual) + return visual; + } + + return NULL; +} + +static xcb_visualtype_t * +get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, + unsigned *depth) +{ + xcb_query_tree_cookie_t tree_cookie; + xcb_get_window_attributes_cookie_t attrib_cookie; + xcb_query_tree_reply_t *tree; + xcb_get_window_attributes_reply_t *attrib; + + tree_cookie = xcb_query_tree(conn, window); + attrib_cookie = xcb_get_window_attributes(conn, window); + + tree = xcb_query_tree_reply(conn, tree_cookie, NULL); + attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); + if (attrib == NULL || tree == NULL) { + free(attrib); + free(tree); + return NULL; + } + + xcb_window_t root = tree->root; + xcb_visualid_t visual_id = attrib->visual; + free(attrib); + free(tree); + + xcb_screen_t *screen = get_screen_for_root(conn, root); + if (screen == NULL) + return NULL; + + return screen_get_visualtype(screen, visual_id, depth); +} + +static bool +visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) +{ + uint32_t rgb_mask = visual->red_mask | + visual->green_mask | + visual->blue_mask; + + uint32_t all_mask = 0xffffffff >> (32 - depth); + + /* Do we have bits left over after RGB? */ + return (all_mask & ~rgb_mask) != 0; +} + +VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, connection); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + return false; + } + + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) + return false; + + if (visual_depth != 24 && visual_depth != 32) + return false; + + return true; +} + +static VkResult +x11_surface_get_support(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, surface->connection); + if (!wsi_conn) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + *pSupported = false; + return VK_SUCCESS; + } + + unsigned visual_depth; + if (!get_visualtype_for_window(surface->connection, surface->window, + &visual_depth)) { + *pSupported = false; + return VK_SUCCESS; + } + + if (visual_depth != 24 && visual_depth != 32) { + *pSupported = false; + return VK_SUCCESS; + } + + *pSupported = true; + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR *caps) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + xcb_get_geometry_cookie_t geom_cookie; + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom; + unsigned visual_depth; + + geom_cookie = xcb_get_geometry(surface->connection, surface->window); + + /* This does a round-trip. This is why we do get_geometry first and + * wait to read the reply until after we have a visual. + */ + xcb_visualtype_t *visual = + get_visualtype_for_window(surface->connection, surface->window, + &visual_depth); + + geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); + if (geom) { + VkExtent2D extent = { geom->width, geom->height }; + caps->currentExtent = extent; + caps->minImageExtent = extent; + caps->maxImageExtent = extent; + } else { + /* This can happen if the client didn't wait for the configure event + * to come back from the compositor. In that case, we don't know the + * size of the window so we just return valid "I don't know" stuff. + */ + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + } + free(err); + free(geom); + + if (visual_has_alpha(visual, visual_depth)) { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + } else { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } + + caps->minImageCount = 2; + caps->maxImageCount = 4; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_formats(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = ARRAY_SIZE(formats); + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); + *pSurfaceFormatCount = ARRAY_SIZE(formats); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateXcbSurfaceKHR( + VkInstance _instance, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceXcb *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; + surface->connection = pCreateInfo->connection; + surface->window = pCreateInfo->window; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct x11_image { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + xcb_get_geometry_cookie_t geom_cookie; + bool busy; +}; + +struct x11_swapchain { + struct anv_swapchain base; + + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t image_count; + uint32_t next_image; + struct x11_image images[0]; +}; + +static VkResult +x11_get_images(struct anv_swapchain *anv_chain, + uint32_t* pCount, VkImage *pSwapchainImages) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +x11_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[chain->next_image]; + + if (image->busy) { + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom = + xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); + if (!geom) { + free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + + if (geom->width != chain->extent.width || + geom->height != chain->extent.height) { + free(geom); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + free(geom); + + image->busy = false; + } + + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} + +static VkResult +x11_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[image_index]; + + assert(image_index < chain->image_count); + + xcb_void_cookie_t cookie; + + cookie = xcb_copy_area(chain->conn, + image->pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + image->busy = true; + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + xcb_void_cookie_t cookie; + + for (uint32_t i = 0; i < chain->image_count; i++) { + struct x11_image *image = &chain->images[i]; + + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + /* TODO: Delete images and free memory */ + } + + anv_free(NULL /* XXX: pAllocator */, chain); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + struct x11_swapchain *chain; + xcb_void_cookie_t cookie; + VkResult result; + + int num_images = pCreateInfo->minImageCount; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = x11_swapchain_destroy; + chain->base.get_images = x11_get_images; + chain->base.acquire_next_image = x11_acquire_next_image; + chain->base.queue_present = x11_queue_present; + + chain->conn = surface->connection; + chain->window = surface->window; + chain->extent = pCreateInfo->imageExtent; + chain->image_count = num_images; + chain->next_image = 0; + + for (uint32_t i = 0; i < chain->image_count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + + image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + + surface = &image->color_surface; + + anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + chain->images[i].busy = false; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult +anv_x11_init_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->connections) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = x11_surface_get_support; + wsi->base.get_capabilities = x11_surface_get_capabilities; + wsi->base.get_formats = x11_surface_get_formats; + wsi->base.get_present_modes = x11_surface_get_present_modes; + wsi->base.create_swapchain = x11_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; + + return result; +} + +void +anv_x11_finish_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->connections, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h new file mode 100644 index 00000000000..7b098894ee2 --- /dev/null +++ b/src/vulkan/gen75_pack.h @@ -0,0 +1,8421 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for HSW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ull << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN75_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_VS_length 0x00000002 + +struct GEN75_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 30) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 0 + +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 + +struct GEN75_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + +} + +#define GEN75_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 + +struct GEN75_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN75_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN75_PIPELINE_SELECT_length 0x00000001 + +struct GEN75_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN75_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN75_MEMORY_OBJECT_CONTROL_STATE { + uint32_t LLCeLLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN75_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + bool GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + bool DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + bool IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + bool InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw2 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw3 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw_InstructionMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw5 = + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN75_STATE_PREFETCH_length_bias 0x00000002 +#define GEN75_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN75_STATE_PREFETCH_length 0x00000002 + +struct GEN75_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN75_STATE_SIP_length_bias 0x00000002 +#define GEN75_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +#define GEN75_STATE_SIP_length 0x00000002 + +struct GEN75_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN75_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN75_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN75_3DPRIMITIVE_length_bias 0x00000002 +#define GEN75_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN75_3DPRIMITIVE_length 0x00000007 + +struct GEN75_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN75_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; +}; + +static inline void +GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN75_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 + +struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + __gen_address_type BindingTablePoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 7, 10) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BindingTablePoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN75_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN75_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN75_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN75_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_CLIP_length 0x00000004 + +struct GEN75_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 + +struct GEN75_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN75_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 + +struct GEN75_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN75_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN75_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_DS_length 0x00000006 + +struct GEN75_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN75_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN75_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 + +struct GEN75_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type GatherPoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_mbo(4, 5) | + __gen_field(dw_MemoryObjectControlState, 0, 3) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->GatherPoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_GS_length 0x00000007 + +struct GEN75_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + uint32_t GSaccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define REORDER_LEADING 0 +#define REORDER_TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool GSEnable; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->GSaccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_HS_length 0x00000007 + +struct GEN75_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + bool Enable; + bool StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool HSaccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + __gen_field(values->MaximumNumberofThreads, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->HSaccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_MemoryObjectControlState, 12, 15) | + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN75_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN75_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN75_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN75_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN75_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN75_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 + +struct GEN75_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool MultiSampleEnable; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; +}; + +static inline void +GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MultiSampleEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN75_3DSTATE_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +#define GEN75_3DSTATE_PS_length 0x00000008 + +struct GEN75_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadPriority; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t SampleMask; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; + bool PSAccessesUAV; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->SampleMask, 12, 19) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PSAccessesUAV, 5, 5) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_RAST_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 + +struct GEN75_3DSTATE_RAST_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NRM_NUMRASTSAMPLES_1 0 +#define NRM_NUMRASTSAMPLES_2 1 +#define NRM_NUMRASTSAMPLES_4 2 +#define NRM_NUMRASTSAMPLES_8 3 +#define NRM_NUMRASTSAMPLES_16 4 + uint32_t NumberofRasterizationMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; + float Sample11XOffset; + float Sample11YOffset; + float Sample10XOffset; + float Sample10YOffset; + float Sample9XOffset; + float Sample9YOffset; + float Sample8XOffset; + float Sample8YOffset; + float Sample15XOffset; + float Sample15YOffset; + float Sample14XOffset; + float Sample14YOffset; + float Sample13XOffset; + float Sample13YOffset; + float Sample12XOffset; + float Sample12YOffset; +}; + +static inline void +GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->NumberofRasterizationMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[4] = + __gen_field(values->Sample11XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample11YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample10XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample10YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample9XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample9YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample8XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample8YOffset * (1 << 4), 0, 3) | + 0; + + dw[5] = + __gen_field(values->Sample15XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample15YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample14XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample14YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample13XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample13YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample12XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample12YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN75_PALETTE_ENTRY_length 0x00000001 + +struct GEN75_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_SBE_length_bias 0x00000002 +#define GEN75_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +#define GEN75_3DSTATE_SBE_length 0x0000000e + +struct GEN75_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SF_length_bias 0x00000002 +#define GEN75_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_SF_length 0x00000007 + +struct GEN75_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool ViewTransformEnable; + uint32_t FrontWinding; + bool AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + float LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + bool LineStippleEnable; + bool ScissorRectangleEnable; + bool RTIndependentRasterizationEnable; + uint32_t MultisampleRasterizationMode; + bool LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + float PointWidth; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->LineStippleEnable, 14, 14) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->RTIndependentRasterizationEnable, 10, 10) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[5] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[6] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN75_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 + +struct GEN75_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN75_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN75_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN75_SO_DECL_ENTRY_length 0x00000002 + +#define GEN75_SO_DECL_length 0x00000001 + +struct GEN75_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN75_SO_DECL_ENTRY { + struct GEN75_SO_DECL Stream3Decl; + struct GEN75_SO_DECL Stream2Decl; + struct GEN75_SO_DECL Stream1Decl; + struct GEN75_SO_DECL Stream0Decl; +}; + +static inline void +GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN75_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN75_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN75_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN75_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN75_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN75_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 + +struct GEN75_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_TE_length_bias 0x00000002 +#define GEN75_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_TE_length 0x00000004 + +struct GEN75_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN75_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_DS_length 0x00000002 + +struct GEN75_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 30) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_GS_length 0x00000002 + +struct GEN75_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 30) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_HS_length 0x00000002 + +struct GEN75_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 30) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN75_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN75_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN75_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN75_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN75_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VF_length_bias 0x00000002 +#define GEN75_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VF_length 0x00000002 + +struct GEN75_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN75_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN75_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_VS_length 0x00000006 + +struct GEN75_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool VSaccessesUAV; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->VSaccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_WM_length_bias 0x00000002 +#define GEN75_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_WM_length 0x00000003 + +struct GEN75_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + bool PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + bool RTIndependentRasterizationEnable; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +#define OFF 0 +#define ON 1 + uint32_t PSUAVonly; +}; + +static inline void +GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->RTIndependentRasterizationEnable, 5, 5) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + __gen_field(values->PSUAVonly, 30, 30) | + 0; + +} + +#define GEN75_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN75_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +#define GEN75_GPGPU_OBJECT_length 0x00000008 + +struct GEN75_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define Slice0 0 +#define Slice1 1 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_GPGPU_WALKER_length_bias 0x00000002 +#define GEN75_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +#define GEN75_GPGPU_WALKER_length 0x0000000b + +struct GEN75_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN75_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN75_MEDIA_OBJECT_length 0x00000000 + +struct GEN75_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define EitherSlice 0 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN75_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN75_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN75_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN75_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + bool ScoreboardMask; + bool DualMode; + bool Repel; + bool QuadMode; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->QuadMode, 29, 29) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN75_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN75_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool DisablePreemption; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->DisablePreemption, 8, 8) | + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN75_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN75_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +#define GEN75_MEDIA_VFE_STATE_length 0x00000008 + +struct GEN75_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t GPGPUMode; + uint32_t HalfSliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + __gen_field(values->HalfSliceDisable, 0, 1) | + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN75_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN75_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN75_MI_ARB_CHECK_length 0x00000001 + +struct GEN75_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN75_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +#define GEN75_MI_ARB_ON_OFF_length 0x00000001 + +struct GEN75_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ArbitrationEnable; +}; + +static inline void +GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN75_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN75_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN75_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 + +struct GEN75_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + bool AddOffsetEnable; + bool PredicationEnable; + uint32_t NonPrivileged; + bool ClearCommandBufferEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->NonPrivileged, 13, 13) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN75_MI_CLFLUSH_length_bias 0x00000002 +#define GEN75_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN75_MI_CLFLUSH_length 0x00000000 + +struct GEN75_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + +struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN75_MI_FLUSH_length_bias 0x00000001 +#define GEN75_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +#define GEN75_MI_FLUSH_length 0x00000001 + +struct GEN75_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + bool GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + bool RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + bool StateInstructionCacheInvalidate; +}; + +static inline void +GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN75_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN75_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 + +struct GEN75_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_MATH_length_bias 0x00000002 +#define GEN75_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN75_MI_MATH_length 0x00000000 + +struct GEN75_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MI_NOOP_length_bias 0x00000001 +#define GEN75_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN75_MI_NOOP_length 0x00000001 + +struct GEN75_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN75_MI_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN75_MI_PREDICATE_length 0x00000001 + +struct GEN75_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN75_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN75_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN75_MI_REPORT_HEAD_length 0x00000001 + +struct GEN75_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN75_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN75_MI_RS_CONTEXT_length 0x00000001 + +struct GEN75_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN75_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN75_MI_RS_CONTROL_length 0x00000001 + +struct GEN75_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN75_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + 0; + + uint32_t dw2 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->DestinationAddress, dw2); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN75_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN75_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 + +struct GEN75_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RVESYNC 1 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t GeneralRegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->GeneralRegisterSelect, 8, 13) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN75_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN75_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN75_MI_SET_CONTEXT_length 0x00000002 + +struct GEN75_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN75_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1, \ + .PREDICATEENABLE = 6 + +#define GEN75_MI_SET_PREDICATE_length 0x00000001 + +struct GEN75_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PredicateAlways 0 +#define PredicateonClear 1 +#define PredicateonSet 2 +#define PredicateDisable 3 + bool PREDICATEENABLE; +}; + +static inline void +GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 1) | + 0; + +} + +#define GEN75_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN75_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN75_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_URB_MEM_length 0x00000003 + +struct GEN75_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN75_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN75_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN75_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN75_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN75_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN75_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN75_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN75_MI_UPDATE_GTT_length 0x00000000 + +struct GEN75_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN75_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN75_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN75_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN75_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN75_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN75_MI_URB_CLEAR_length 0x00000002 + +struct GEN75_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN75_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN75_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN75_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN75_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN75_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN75_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN75_PIPE_CONTROL_length_bias 0x00000002 +#define GEN75_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +#define GEN75_PIPE_CONTROL_length 0x00000005 + +struct GEN75_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +#define GEN75_SCISSOR_RECT_length 0x00000002 + +struct GEN75_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN75_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN75_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + +} + +#define GEN75_BLEND_STATE_length 0x00000002 + +struct GEN75_BLEND_STATE { + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + bool AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +}; + +static inline void +GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +#define GEN75_CC_VIEWPORT_length 0x00000002 + +struct GEN75_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN75_COLOR_CALC_STATE_length 0x00000006 + +struct GEN75_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN75_DEPTH_STENCIL_STATE_length 0x00000003 + +struct GEN75_DEPTH_STENCIL_STATE { + bool StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + bool DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + bool DepthBufferWriteEnable; +}; + +static inline void +GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +#define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN75_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + + dw[7] = + 0; + +} + +#define GEN75_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN75_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + +#define GEN75_RENDER_SURFACE_STATE_length 0x00000008 + +struct GEN75_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN_2 0 +#define VALIGN_4 1 + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t IntegerSurfaceFormat; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + __gen_address_type MCSBaseAddress; + uint32_t MCSSurfacePitch; + __gen_address_type AppendCounterAddress; + bool AppendCounterEnable; + bool MCSEnable; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + uint32_t ShaderChannelSelectR; + uint32_t ShaderChannelSelectG; + uint32_t ShaderChannelSelectB; + uint32_t ShaderChannelSelectA; + float ResourceMinLOD; +}; + +static inline void +GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->IntegerSurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + __gen_field(dw_SurfaceObjectControlState, 16, 19) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + uint32_t dw6 = + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + + dw[7] = + __gen_field(values->ShaderChannelSelectR, 25, 27) | + __gen_field(values->ShaderChannelSelectG, 22, 24) | + __gen_field(values->ShaderChannelSelectB, 19, 21) | + __gen_field(values->ShaderChannelSelectA, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + +} + +#define GEN75_SAMPLER_BORDER_COLOR_STATE_length 0x00000014 + +#define GEN75_BORDER_COLOR_UINT32_SINT32_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT32_SINT32 { + uint32_t BorderColorRedui32integerunclamp; + uint32_t BorderColorRedsi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp; + uint32_t BorderColorGreensi32integerunclamp; + uint32_t BorderColorBlueui32integerunclamp; + uint32_t BorderColorBluesi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp0; + uint32_t BorderColorGreensi32integerunclamp0; + uint32_t BorderColorAlphaui32integerunclamp; + uint32_t BorderColorAlphasi32integerunclamp; +}; + +static inline void +GEN75_BORDER_COLOR_UINT32_SINT32_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT32_SINT32 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorRedui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorRedsi32integerunclamp, 0, 31) | + 0; + + dw[1] = + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[2] = + __gen_field(values->BorderColorBlueui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorBluesi32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[3] = + __gen_field(values->BorderColorAlphaui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorAlphasi32integerunclamp, 0, 31) | + 0; + +} + +#define GEN75_BORDER_COLOR_UINT16_SINT16_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT16_SINT16 { + uint32_t BorderColorGreenclamptouint16; + uint32_t BorderColorGreenclamptosint16; + uint32_t BorderColorRedclamptouint16; + uint32_t BorderColorRedclamptosint16; + uint32_t BorderColorAlphaclamptouint16; + uint32_t BorderColorAlphaclamptosint16; + uint32_t BorderColorBlueclamptouint16; + uint32_t BorderColorBlueclamptosint16; +}; + +static inline void +GEN75_BORDER_COLOR_UINT16_SINT16_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT16_SINT16 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorGreenclamptouint16, 16, 31) | + __gen_field(values->BorderColorGreenclamptosint16, 16, 31) | + __gen_field(values->BorderColorRedclamptouint16, 0, 15) | + __gen_field(values->BorderColorRedclamptosint16, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->BorderColorAlphaclamptouint16, 16, 31) | + __gen_field(values->BorderColorAlphaclamptosint16, 16, 31) | + __gen_field(values->BorderColorBlueclamptouint16, 0, 15) | + __gen_field(values->BorderColorBlueclamptosint16, 0, 15) | + 0; + + dw[3] = + 0; + +} + +#define GEN75_BORDER_COLOR_UINT8_SINT8_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT8_SINT8 { + uint32_t BorderColorAlphaclamptouint8; + uint32_t BorderColorAlphaclamptosint8; + uint32_t BorderColorBlueclamptouint8; + uint32_t BorderColorBlueclamptosint8; + uint32_t BorderColorGreenclamptouint8; + uint32_t BorderColorGreenclamptosint8; + uint32_t BorderRedAlphaclamptouint8; + uint32_t BorderRedAlphaclamptosint8; +}; + +static inline void +GEN75_BORDER_COLOR_UINT8_SINT8_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT8_SINT8 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorAlphaclamptouint8, 24, 31) | + __gen_field(values->BorderColorAlphaclamptosint8, 24, 31) | + __gen_field(values->BorderColorBlueclamptouint8, 16, 23) | + __gen_field(values->BorderColorBlueclamptosint8, 16, 23) | + __gen_field(values->BorderColorGreenclamptouint8, 8, 15) | + __gen_field(values->BorderColorGreenclamptosint8, 8, 15) | + __gen_field(values->BorderRedAlphaclamptouint8, 0, 7) | + __gen_field(values->BorderRedAlphaclamptosint8, 0, 7) | + 0; + + dw[1] = + 0; + + dw[2] = + 0; + + dw[3] = + 0; + +} + +struct GEN75_SAMPLER_BORDER_COLOR_STATE { + float BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; + struct GEN75_BORDER_COLOR_UINT32_SINT32 BorderColor; + struct GEN75_BORDER_COLOR_UINT16_SINT16 BorderColor0; + struct GEN75_BORDER_COLOR_UINT8_SINT8 BorderColor1; +}; + +static inline void +GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->BorderColorRedDX100GL) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_float(values->BorderColorGreen) | + 0; + + dw[2] = + __gen_float(values->BorderColorBlue) | + 0; + + dw[3] = + __gen_float(values->BorderColorAlpha) | + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + 0; + } + + GEN75_BORDER_COLOR_UINT32_SINT32_pack(data, &dw[16], &values->BorderColor); +} + +#define GEN75_SAMPLER_STATE_length 0x00000004 + +struct GEN75_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + float TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define TRIQUAL_HIGHMAG_CLAMP_MIPFILTER 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c new file mode 100644 index 00000000000..7108d74ba65 --- /dev/null +++ b/src/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,1007 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen7_pack.h" +#include "gen75_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) +{ + static const uint32_t sampler_state_opcodes[] = { + [MESA_SHADER_VERTEX] = 43, + [MESA_SHADER_TESS_CTRL] = 44, /* HS */ + [MESA_SHADER_TESS_EVAL] = 45, /* DS */ + [MESA_SHADER_GEOMETRY] = 46, + [MESA_SHADER_FRAGMENT] = 47, + [MESA_SHADER_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [MESA_SHADER_VERTEX] = 38, + [MESA_SHADER_TESS_CTRL] = 39, + [MESA_SHADER_TESS_EVAL] = 40, + [MESA_SHADER_GEOMETRY] = 41, + [MESA_SHADER_FRAGMENT] = 42, + [MESA_SHADER_COMPUTE] = 0, + }; + + anv_foreach_stage(s, stages) { + if (cmd_buffer->state.samplers[s].alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); + } + + /* Always emit binding table pointers if we're asked to, since on SKL + * this is what flushes push constants. */ + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); + } +} + +GENX_FUNC(GEN7, GEN7) uint32_t +genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; + } + } + + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(scissor_state); +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, +}; + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + if (ANV_IS_HASWELL) + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .ConstantURBEntryReadLength = + push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +static void +cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + /* FIXME: figure out descriptors for gen7 */ + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } + + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) { + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + } + + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + + /* We use the gen8 state here because it only contains the additional + * min/max fields and, since they occur at the end of the packet and + * don't change the stride, they work on gen7 too. + */ + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { + + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN7_COLOR_CALC_STATE_length * 4, + 64); + struct GEN7_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + + struct anv_state ds_state = + anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = ds_state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + if (ANV_IS_HASWELL) { + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = 0, + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + cmd_buffer_flush_compute_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + + /* XXX: isl needs to grow depth format support */ + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + + const bool has_depth = iview && anv_format->depth_format; + const bool has_stencil = iview && anv_format->has_stencil; + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = true, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = anv_format->depth_format, + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), + .RenderTargetViewExtent = 1 - 1); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +# if (ANV_IS_HASWELL) + .StencilBufferEnable = true, +# endif + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); + } else { + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + gen7_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + gen7_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + stub(); +} diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h new file mode 100644 index 00000000000..7b104c3ab3c --- /dev/null +++ b/src/vulkan/gen7_pack.h @@ -0,0 +1,7003 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for IVB. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ull << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN7_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_VS_length 0x00000002 + +struct GEN7_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 29) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 + +struct GEN7_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN7_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN7_PIPELINE_SELECT_length 0x00000001 + +struct GEN7_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN7_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN7_MEMORY_OBJECT_CONTROL_STATE { + uint32_t GraphicsDataTypeGFDT; + uint32_t LLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | + __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN7_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + uint32_t StatelessDataPortAccessForceWriteThru; + bool GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + bool GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + bool DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + bool IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + bool InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | + __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw2 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw3 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw_InstructionMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw5 = + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN7_STATE_PREFETCH_length_bias 0x00000002 +#define GEN7_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN7_STATE_PREFETCH_length 0x00000002 + +struct GEN7_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN7_STATE_SIP_length_bias 0x00000002 +#define GEN7_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +#define GEN7_STATE_SIP_length 0x00000002 + +struct GEN7_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN7_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN7_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN7_3DPRIMITIVE_length_bias 0x00000002 +#define GEN7_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN7_3DPRIMITIVE_length 0x00000007 + +struct GEN7_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN7_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; +}; + +static inline void +GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN7_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN7_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN7_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN7_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_CLIP_length 0x00000004 + +struct GEN7_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 + +struct GEN7_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN7_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 + +struct GEN7_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN7_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN7_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +#define GEN7_3DSTATE_DS_length 0x00000006 + +struct GEN7_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_GS_length 0x00000007 + +struct GEN7_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; + bool ReorderEnable; + bool DiscardAdjacency; + bool GSEnable; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->ControlDataFormat, 24, 24) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderEnable, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_HS_length 0x00000007 + +struct GEN7_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + bool Enable; + bool StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->MaximumNumberofThreads, 0, 6) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + bool CutIndexEnable; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_MemoryObjectControlState, 12, 15) | + __gen_field(values->CutIndexEnable, 10, 10) | + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN7_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN7_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN7_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN7_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN7_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN7_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 + +struct GEN7_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; +}; + +static inline void +GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN7_3DSTATE_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +#define GEN7_3DSTATE_PS_length 0x00000008 + +struct GEN7_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN7_PALETTE_ENTRY_length 0x00000001 + +struct GEN7_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_SBE_length_bias 0x00000002 +#define GEN7_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +#define GEN7_3DSTATE_SBE_length 0x0000000e + +struct GEN7_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SWIZ_0_15 0 +#define SWIZ_16_31 1 + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SF_length_bias 0x00000002 +#define GEN7_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_SF_length 0x00000007 + +struct GEN7_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool ViewTransformEnable; + uint32_t FrontWinding; + bool AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + float LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + bool ScissorRectangleEnable; + uint32_t MultisampleRasterizationMode; + bool LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + float PointWidth; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[5] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[6] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN7_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 + +struct GEN7_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN7_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN7_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN7_SO_DECL_ENTRY_length 0x00000002 + +#define GEN7_SO_DECL_length 0x00000001 + +struct GEN7_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN7_SO_DECL_ENTRY { + struct GEN7_SO_DECL Stream3Decl; + struct GEN7_SO_DECL Stream2Decl; + struct GEN7_SO_DECL Stream1Decl; + struct GEN7_SO_DECL Stream0Decl; +}; + +static inline void +GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN7_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN7_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN7_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN7_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN7_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN7_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 + +struct GEN7_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_TE_length_bias 0x00000002 +#define GEN7_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_TE_length 0x00000004 + +struct GEN7_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN7_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_DS_length 0x00000002 + +struct GEN7_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 29) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_GS_length 0x00000002 + +struct GEN7_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 29) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_HS_length 0x00000002 + +struct GEN7_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 29) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN7_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN7_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN7_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN7_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN7_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN7_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN7_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +#define GEN7_3DSTATE_VS_length 0x00000006 + +struct GEN7_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_WM_length_bias 0x00000002 +#define GEN7_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_WM_length 0x00000003 + +struct GEN7_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + bool PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +}; + +static inline void +GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + 0; + +} + +#define GEN7_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN7_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +#define GEN7_GPGPU_OBJECT_length 0x00000008 + +struct GEN7_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_GPGPU_WALKER_length_bias 0x00000002 +#define GEN7_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +#define GEN7_GPGPU_WALKER_length 0x0000000b + +struct GEN7_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN7_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN7_MEDIA_OBJECT_length 0x00000000 + +struct GEN7_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN7_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN7_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN7_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN7_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + bool ScoreboardMask; + bool DualMode; + bool Repel; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalEndY; + uint32_t LocalEndX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + __gen_field(values->LocalEndY, 16, 24) | + __gen_field(values->LocalEndX, 0, 8) | + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN7_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN7_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN7_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN7_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +#define GEN7_MEDIA_VFE_STATE_length 0x00000008 + +struct GEN7_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; +#define NoMMIOreadwriteallowed 0 +#define MMIOreadwritetoanyaddress 2 + uint32_t GatewayMMIOAccessControl; + uint32_t GPGPUMode; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GatewayMMIOAccessControl, 3, 4) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN7_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN7_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN7_MI_ARB_CHECK_length 0x00000001 + +struct GEN7_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN7_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +#define GEN7_MI_ARB_ON_OFF_length 0x00000001 + +struct GEN7_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ArbitrationEnable; +}; + +static inline void +GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN7_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN7_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN7_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 + +struct GEN7_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ClearCommandBufferEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN7_MI_CLFLUSH_length_bias 0x00000002 +#define GEN7_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN7_MI_CLFLUSH_length 0x00000000 + +struct GEN7_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + +struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN7_MI_FLUSH_length_bias 0x00000001 +#define GEN7_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +#define GEN7_MI_FLUSH_length 0x00000001 + +struct GEN7_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + bool GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + bool RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + bool StateInstructionCacheInvalidate; +}; + +static inline void +GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN7_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 + +struct GEN7_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_MI_NOOP_length_bias 0x00000001 +#define GEN7_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN7_MI_NOOP_length 0x00000001 + +struct GEN7_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN7_MI_PREDICATE_length_bias 0x00000001 +#define GEN7_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN7_MI_PREDICATE_length 0x00000001 + +struct GEN7_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN7_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN7_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN7_MI_REPORT_HEAD_length 0x00000001 + +struct GEN7_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN7_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 + +struct GEN7_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN7_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN7_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN7_MI_SET_CONTEXT_length 0x00000002 + +struct GEN7_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool ExtendedStateSaveEnable; + bool ExtendedStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->ExtendedStateSaveEnable, 3, 3) | + __gen_field(values->ExtendedStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN7_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN7_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN7_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN7_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN7_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN7_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN7_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN7_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN7_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN7_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN7_MI_UPDATE_GTT_length 0x00000000 + +struct GEN7_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN7_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN7_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN7_MI_URB_CLEAR_length 0x00000002 + +struct GEN7_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 28) | + __gen_offset(values->URBAddress, 0, 13) | + 0; + +} + +#define GEN7_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN7_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN7_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN7_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN7_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN7_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN7_PIPE_CONTROL_length_bias 0x00000002 +#define GEN7_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +#define GEN7_PIPE_CONTROL_length 0x00000005 + +struct GEN7_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +#define GEN7_SCISSOR_RECT_length 0x00000002 + +struct GEN7_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN7_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN7_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + +} + +#define GEN7_BLEND_STATE_length 0x00000002 + +struct GEN7_BLEND_STATE { + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + bool AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +}; + +static inline void +GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +#define GEN7_CC_VIEWPORT_length 0x00000002 + +struct GEN7_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN7_COLOR_CALC_STATE_length 0x00000006 + +struct GEN7_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN7_DEPTH_STENCIL_STATE_length 0x00000003 + +struct GEN7_DEPTH_STENCIL_STATE { + bool StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + bool DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + bool DepthBufferWriteEnable; +}; + +static inline void +GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +#define GEN7_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN7_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; +}; + +static inline void +GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + +} + +#define GEN7_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN7_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN7_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + +#define GEN7_RENDER_SURFACE_STATE_length 0x00000008 + +struct GEN7_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN_2 0 +#define VALIGN_4 1 + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + __gen_address_type MCSBaseAddress; + uint32_t MCSSurfacePitch; + __gen_address_type AppendCounterAddress; + bool AppendCounterEnable; + bool MCSEnable; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t RedClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t GreenClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t BlueClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t AlphaClearColor; + float ResourceMinLOD; +}; + +static inline void +GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + __gen_field(dw_SurfaceObjectControlState, 16, 19) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + uint32_t dw6 = + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + +} + +#define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 + +struct GEN7_SAMPLER_BORDER_COLOR_STATE { + float BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; +}; + +static inline void +GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->BorderColorRedDX100GL) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_float(values->BorderColorGreen) | + 0; + + dw[2] = + __gen_float(values->BorderColorBlue) | + 0; + + dw[3] = + __gen_float(values->BorderColorAlpha) | + 0; + +} + +#define GEN7_SAMPLER_STATE_length 0x00000004 + +struct GEN7_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + float TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c new file mode 100644 index 00000000000..42c50310e37 --- /dev/null +++ b/src/vulkan/gen7_pipeline.c @@ -0,0 +1,452 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen7_pack.h" +#include "gen75_pack.h" + +#include "genX_pipeline_util.h" + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* FIXME: Get this from pass info */ + .DepthBufferSurfaceFormat = D24_UNORM_X8_UINT, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + struct GEN7_DEPTH_STENCIL_STATE state = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + if (info->pAttachments == NULL) { + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = false, + .WriteDisableRed = false, + .WriteDisableGreen = false, + .WriteDisableBlue = false); + } else { + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + +# if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +# endif + + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +# if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +# endif + ); + } + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 1024); +} + +GENX_FUNC(GEN7, GEN75) VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + + assert(pCreateInfo->pRasterizationState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + + const VkPipelineRasterizationStateCreateInfo *rs_info = + pCreateInfo->pRasterizationState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .KernelStartPointer = pipeline->gs_kernel, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, +# if (ANV_IS_HASWELL) + .ReorderMode = REORDER_TRAILING, +# else + .ReorderEnable = true, +# endif + .GSEnable = true); + } + + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_finishme("gen7 alternative to " + "3DSTATE_PS_EXTRA.PixelShaderValid = false"); + } else { + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c new file mode 100644 index 00000000000..2375070636e --- /dev/null +++ b/src/vulkan/gen7_state.c @@ -0,0 +1,238 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen7_pack.h" +#include "gen75_pack.h" + +#include "genX_state_util.h" + +GENX_FUNC(GEN7, GEN75) void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = false, + .SurfaceObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectR = SCS_RED, + .ShaderChannelSelectG = SCS_GREEN, + .ShaderChannelSelectB = SCS_BLUE, + .ShaderChannelSelectA = SCS_ALPHA, +# endif + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampEnable = OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + uint32_t depth = 1; + if (range->layerCount > 1) { + depth = range->layerCount; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(&surface->isl); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ + .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, + .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? + TILEWALK_YMAJOR : TILEWALK_XMAJOR, + + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + + .RenderCacheReadWriteMode = 0, /* TEMPLATE */ + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->isl.row_pitch - 1, + .MinimumArrayElement = range->baseArrayLayer, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GENX(MOCS), + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .MCSEnable = false, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectR = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectG = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectB = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectA = vk_to_gen_swizzle[iview->swizzle.a], +# else /* XXX: Seriously? */ + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, +# endif + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c new file mode 100644 index 00000000000..56d80e26eeb --- /dev/null +++ b/src/vulkan/gen8_cmd_buffer.c @@ -0,0 +1,1339 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen8_pack.h" +#include "gen9_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +#if ANV_GEN == 8 +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = 1.0, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = 0.0, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, + }; + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} +#endif + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN8_L3CNTLREG 0x7034 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t val = enable_slm ? + /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ + 0x60000021 : + /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ + 0x60000060; + bool changed = cmd_buffer->state.current_l3_config != val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); + cmd_buffer->state.current_l3_config = val; + } +} + +static void +flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer) +{ + config_l3(cmd_buffer, false); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } +} + +static void +__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} +static void +__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GEN9_3DSTATE_SF sf = { + GEN9_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->info.is_cherryview) + __emit_gen9_sf_state(cmd_buffer); + else + __emit_genx_sf_state(cmd_buffer); +} + +static void +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + flush_pipeline_select_3d(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { + __emit_sf_state(cmd_buffer); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); + } + + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length * 4, + 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length * 4, + 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen9.wm_depth_stencil); + } +#endif + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = 0, + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, + }; + + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if ANV_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + cmd_buffer_flush_compute_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + + /* XXX: isl needs to grow depth format support */ + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + + const bool has_depth = iview && anv_format->depth_format; + const bool has_stencil = iview && anv_format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = anv_format->depth_format, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = anv_format->depth_format, + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), + .StencilBufferEnable = true, + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }, + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + flush_pipeline_select_3d(cmd_buffer); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_SET); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_RESET); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), + .WaitMode = PollingMode, + .CompareOperation = SAD_EQUAL_SDD, + .SemaphoreDataDword = VK_EVENT_SET, + .SemaphoreAddress = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }); + } + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h new file mode 100644 index 00000000000..3c014b96147 --- /dev/null +++ b/src/vulkan/gen8_pack.h @@ -0,0 +1,9209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for BDW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ull << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_VS_length 0x00000002 + +struct GEN8_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_VS_length 0x00000009 + +struct GEN8_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool VertexCacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 + +struct GEN8_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_MI_ATOMIC_length_bias 0x00000002 +#define GEN8_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +#define GEN8_MI_ATOMIC_length 0x00000003 + +struct GEN8_MI_ATOMIC { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN8_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 + +struct GEN8_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 + +struct GEN8_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 + +struct GEN8_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN8_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN8_PIPELINE_SELECT_length 0x00000001 + +struct GEN8_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 14 + +#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 + +#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN8_MEMORY_OBJECT_CONTROL_STATE { +#define UCwithFenceifcoherentcycle 0 +#define UCUncacheable 1 +#define WT 2 +#define WB 3 + uint32_t MemoryTypeLLCeLLCCacheabilityControl; +#define eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC 0 +#define LLCOnly 1 +#define LLCeLLCAllowed 2 +#define L3DefertoPATforLLCeLLCselection 3 + uint32_t TargetCache; + uint32_t AgeforQUADLRU; +}; + +static inline void +GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->MemoryTypeLLCeLLCCacheabilityControl, 5, 6) | + __gen_field(values->TargetCache, 3, 4) | + __gen_field(values->AgeforQUADLRU, 0, 1) | + 0; + +} + +struct GEN8_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + bool GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + bool DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + bool IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + bool InstructionBuffersizeModifyEnable; +}; + +static inline void +GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + dw[3] = + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | + 0; + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw6 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw8 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw_InstructionMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw10 = + __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + +} + +#define GEN8_STATE_PREFETCH_length_bias 0x00000002 +#define GEN8_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN8_STATE_PREFETCH_length 0x00000002 + +struct GEN8_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN8_STATE_SIP_length_bias 0x00000002 +#define GEN8_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +#define GEN8_STATE_SIP_length 0x00000003 + +struct GEN8_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t SystemInstructionPointer; +}; + +static inline void +GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN8_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_3DPRIMITIVE_length_bias 0x00000002 +#define GEN8_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN8_3DPRIMITIVE_length 0x00000007 + +struct GEN8_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN8_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AAPointCoverageBias; + float AACoverageBias; + float AAPointCoverageSlope; + float AACoverageSlope; + float AAPointCoverageEndCapBias; + float AACoverageEndCapBias; + float AAPointCoverageEndCapSlope; + float AACoverageEndCapSlope; +}; + +static inline void +GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN8_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 + +struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + bool BlendStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + bool ColorCalcStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN8_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN8_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN8_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN8_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_CLIP_length 0x00000004 + +struct GEN8_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + bool ForceClipMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b + +#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a + +struct GEN8_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint32_t dw4 = + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw6 = + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + +} + +struct GEN8_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 + +struct GEN8_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 6) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN8_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_DS_length 0x00000009 + +struct GEN8_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool ComputeWCoordinateEnable; + bool CacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 3, 3) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN8_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN8_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 + +struct GEN8_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +#define GEN8_3DSTATE_GS_length 0x0000000a + +struct GEN8_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + bool StatisticsEnable; + uint32_t InvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + bool StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_HS_length 0x00000009 + +struct GEN8_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + bool Enable; + bool StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint64_t KernelStartPointer; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + bool SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool AccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; +}; + +static inline void +GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + uint64_t qw3 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = qw3; + dw[4] = qw3 >> 32; + + uint64_t qw5 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[8] = + 0; + +} + +#define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN8_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN8_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 + +struct GEN8_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN8_3DSTATE_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +#define GEN8_3DSTATE_PS_length 0x0000000c + +struct GEN8_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + bool PushConstantEnable; + bool RenderTargetFastClearEnable; + bool RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint64_t KernelStartPointer1; + uint64_t KernelStartPointer2; +}; + +static inline void +GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + uint64_t qw8 = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint64_t qw10 = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + + dw[10] = qw10; + dw[11] = qw10 >> 32; + +} + +#define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 + +struct GEN8_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool AlphaToCoverageEnable; + bool HasWriteableRT; + bool ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaTestEnable; + bool IndependentAlphaBlendEnable; +}; + +static inline void +GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 + +struct GEN8_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool PixelShaderValid; + bool PixelShaderDoesnotwritetoRT; + bool oMaskPresenttoRenderTarget; + bool PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + bool ForceComputedDepth; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; + uint32_t Removed; + bool AttributeEnable; + bool PixelShaderDisablesAlphaToCoverage; + bool PixelShaderIsPerSample; + bool PixelShaderHasUAV; + bool PixelShaderUsesInputCoverageMask; +}; + +static inline void +GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 1, 1) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN8_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_RASTER_length 0x00000005 + +struct GEN8_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define FSC_NUMRASTSAMPLES_0 0 +#define FSC_NUMRASTSAMPLES_1 1 +#define FSC_NUMRASTSAMPLES_2 2 +#define FSC_NUMRASTSAMPLES_4 3 +#define FSC_NUMRASTSAMPLES_8 4 +#define FSC_NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + bool SmoothPointEnable; + bool DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool AntialiasingEnable; + bool ScissorRectangleEnable; + bool ViewportZClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN8_PALETTE_ENTRY_length 0x00000001 + +struct GEN8_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 + +struct GEN8_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float _8xSample7XOffset; + float _8xSample7YOffset; + float _8xSample6XOffset; + float _8xSample6YOffset; + float _8xSample5XOffset; + float _8xSample5YOffset; + float _8xSample4XOffset; + float _8xSample4YOffset; + float _8xSample3XOffset; + float _8xSample3YOffset; + float _8xSample2XOffset; + float _8xSample2YOffset; + float _8xSample1XOffset; + float _8xSample1YOffset; + float _8xSample0XOffset; + float _8xSample0YOffset; + float _4xSample3XOffset; + float _4xSample3YOffset; + float _4xSample2XOffset; + float _4xSample2YOffset; + float _4xSample1XOffset; + float _4xSample1YOffset; + float _4xSample0XOffset; + float _4xSample0YOffset; + float _1xSample0XOffset; + float _1xSample0YOffset; + float _2xSample1XOffset; + float _2xSample1YOffset; + float _2xSample0XOffset; + float _2xSample0YOffset; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 4; i += 1, j++) { + dw[j] = + 0; + } + + dw[5] = + __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | + __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | + __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SBE_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_SBE_length 0x00000004 + +struct GEN8_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ForceVertexURBEntryReadLength; + bool ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + bool PrimitiveIDOverrideComponentW; + bool PrimitiveIDOverrideComponentZ; + bool PrimitiveIDOverrideComponentY; + bool PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; +}; + +static inline void +GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b + +#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + +struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { + bool ComponentOverrideW; + bool ComponentOverrideZ; + bool ComponentOverrideY; + bool ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN8_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; + uint32_t AttributeWrapShortestEnables[16]; +}; + +static inline void +GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { + uint32_t dw_Attribute0; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); + uint32_t dw_Attribute1; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); + dw[j] = + __gen_field(dw_Attribute0, 0, 15) | + __gen_field(dw_Attribute1, 16, 31) | + 0; + } + + for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { + dw[j] = + __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | + __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | + __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | + __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | + __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | + __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | + __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | + __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | + 0; + } + +} + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SF_length_bias 0x00000002 +#define GEN8_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_SF_length 0x00000004 + +struct GEN8_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool ViewportTransformEnable; + float LineWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + bool LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + bool SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + float PointWidth; +}; + +static inline void +GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + +} + +#define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 + +struct GEN8_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool SOBufferEnable; + uint32_t SOBufferIndex; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + bool StreamOffsetWriteEnable; + bool StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 22, 28) | + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + uint64_t qw5 = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN8_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN8_SO_DECL_ENTRY_length 0x00000002 + +#define GEN8_SO_DECL_length 0x00000001 + +struct GEN8_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN8_SO_DECL_ENTRY { + struct GEN8_SO_DECL Stream3Decl; + struct GEN8_SO_DECL Stream2Decl; + struct GEN8_SO_DECL Stream1Decl; + struct GEN8_SO_DECL Stream0Decl; +}; + +static inline void +GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN8_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN8_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN8_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN8_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN8_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 22, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN8_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 + +struct GEN8_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN8_3DSTATE_TE_length_bias 0x00000002 +#define GEN8_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_TE_length 0x00000004 + +struct GEN8_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_DS_length 0x00000002 + +struct GEN8_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_GS_length 0x00000002 + +struct GEN8_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_HS_length 0x00000002 + +struct GEN8_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN8_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(dw_MemoryObjectControlState, 16, 22) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN8_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN8_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN8_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VF_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_length 0x00000002 + +struct GEN8_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 + +struct GEN8_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 + +struct GEN8_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + bool VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN8_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 + +struct GEN8_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_WM_length 0x00000002 + +struct GEN8_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool LegacyDepthBufferClearEnable; + bool LegacyDepthBufferResolveEnable; + bool LegacyHierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 + +struct GEN8_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ChromaKeyKillEnable; +}; + +static inline void +GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 + +struct GEN8_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; + bool DoubleSidedStencilEnable; + bool StencilTestEnable; + bool StencilBufferWriteEnable; + bool DepthTestEnable; + bool DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; +}; + +static inline void +GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 + +struct GEN8_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StencilBufferClearEnable; + bool DepthBufferClearEnable; + bool ScissorRectangleEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + bool FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_GPGPU_WALKER_length_bias 0x00000002 +#define GEN8_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +#define GEN8_GPGPU_WALKER_length 0x0000000f + +struct GEN8_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN8_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN8_MEDIA_OBJECT_length 0x00000000 + +struct GEN8_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +#define GEN8_MEDIA_OBJECT_GRPID_length 0x00000000 + +struct GEN8_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoreboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoreboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN8_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN8_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN8_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN8_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + bool ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN8_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN8_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +#define GEN8_MEDIA_VFE_STATE_length 0x00000009 + +struct GEN8_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN8_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN8_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN8_MI_ARB_CHECK_length 0x00000001 + +struct GEN8_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN8_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN8_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 + +struct GEN8_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + bool AddOffsetEnable; + uint32_t PredicationEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_MI_CLFLUSH_length_bias 0x00000002 +#define GEN8_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN8_MI_CLFLUSH_length 0x00000000 + +struct GEN8_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + /* variable length fields follow */ +} + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 1 + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 + +struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN8_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 + +struct GEN8_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw3 = + 0; + + uint64_t qw3 = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + + dw[3] = qw3; + dw[4] = qw3 >> 32; + +} + +#define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN8_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 + +struct GEN8_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN8_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN8_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + bool ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 + +struct GEN8_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_MATH_length_bias 0x00000002 +#define GEN8_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN8_MI_MATH_length 0x00000000 + +struct GEN8_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MI_NOOP_length_bias 0x00000001 +#define GEN8_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN8_MI_NOOP_length 0x00000001 + +struct GEN8_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN8_MI_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN8_MI_PREDICATE_length 0x00000001 + +struct GEN8_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN8_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN8_MI_REPORT_HEAD_length 0x00000001 + +struct GEN8_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN8_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN8_MI_RS_CONTEXT_length 0x00000001 + +struct GEN8_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN8_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN8_MI_RS_CONTROL_length 0x00000001 + +struct GEN8_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN8_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN8_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN8_MI_SET_CONTEXT_length 0x00000002 + +struct GEN8_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +#define GEN8_MI_SET_PREDICATE_length 0x00000001 + +struct GEN8_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN8_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + bool StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN8_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_URB_MEM_length 0x00000004 + +struct GEN8_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN8_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN8_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN8_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN8_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN8_MI_UPDATE_GTT_length 0x00000000 + +struct GEN8_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN8_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN8_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN8_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN8_MI_URB_CLEAR_length 0x00000002 + +struct GEN8_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN8_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN8_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN8_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN8_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN8_PIPE_CONTROL_length_bias 0x00000002 +#define GEN8_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +#define GEN8_PIPE_CONTROL_length 0x00000006 + +struct GEN8_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint64_t ImmediateData; +}; + +static inline void +GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint64_t qw4 = + __gen_field(values->ImmediateData, 0, 63) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + +} + +#define GEN8_SCISSOR_RECT_length 0x00000002 + +struct GEN8_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN8_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN8_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +#define GEN8_BLEND_STATE_length 0x00000011 + +#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 + +struct GEN8_BLEND_STATE_ENTRY { + bool LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; + bool ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; +}; + +static inline void +GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint64_t qw0 = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN8_BLEND_STATE { + bool AlphaToCoverageEnable; + bool IndependentAlphaBlendEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + struct GEN8_BLEND_STATE_ENTRY Entry[8]; +}; + +static inline void +GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) + GEN8_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); +} + +#define GEN8_CC_VIEWPORT_length 0x00000002 + +struct GEN8_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN8_COLOR_CALC_STATE_length 0x00000006 + +struct GEN8_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_length 0x00000002 + +struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 { + uint32_t BlackPointOffsetR; + uint32_t BlackPointOffsetG; + uint32_t BlackPointOffsetB; +}; + +static inline void +GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BlackPointOffsetR, 0, 12) | + 0; + + dw[1] = + __gen_field(values->BlackPointOffsetG, 13, 25) | + __gen_field(values->BlackPointOffsetB, 0, 12) | + 0; + +} + +#define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN8_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; +#define Encodes0k 0 +#define Encodes4k 1 +#define Encodes8k 2 +#define Encodes16k 4 +#define Encodes32k 8 +#define Encodes64k 16 + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +#define GEN8_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN8_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 6, 31) | + 0; + +} + +#define GEN8_RENDER_SURFACE_STATE_length 0x00000010 + +struct GEN8_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + bool SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + bool CubeFaceEnablePositiveZ; + bool CubeFaceEnableNegativeZ; + bool CubeFaceEnablePositiveY; + bool CubeFaceEnableNegativeY; + bool CubeFaceEnablePositiveX; + bool CubeFaceEnableNegativeX; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + float BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + bool EWADisableForCube; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_MCS 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 + uint32_t AuxiliarySurfaceMode; + bool SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + float ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; +}; + +static inline void +GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(dw_MemoryObjectControlState, 24, 30) | + __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + 0; + + dw[13] = + 0; + + dw[14] = + 0; + + dw[15] = + 0; + +} + +#define GEN8_FILTER_COEFFICIENT_length 0x00000001 + +struct GEN8_FILTER_COEFFICIENT { + uint32_t FilterCoefficient; +}; + +static inline void +GEN8_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_FILTER_COEFFICIENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->FilterCoefficient, 0, 7) | + 0; + +} + +#define GEN8_SAMPLER_STATE_length 0x00000004 + +struct GEN8_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + float TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +#define GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 + +struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { + uint32_t Table0YFilterCoefficientn1; + uint32_t Table0XFilterCoefficientn1; + uint32_t Table0YFilterCoefficientn0; + uint32_t Table0XFilterCoefficientn0; + uint32_t Table0YFilterCoefficientn3; + uint32_t Table0XFilterCoefficientn3; + uint32_t Table0YFilterCoefficientn2; + uint32_t Table0XFilterCoefficientn2; + uint32_t Table0YFilterCoefficientn5; + uint32_t Table0XFilterCoefficientn5; + uint32_t Table0YFilterCoefficientn4; + uint32_t Table0XFilterCoefficientn4; + uint32_t Table0YFilterCoefficientn7; + uint32_t Table0XFilterCoefficientn7; + uint32_t Table0YFilterCoefficientn6; + uint32_t Table0XFilterCoefficientn6; + uint32_t Table1XFilterCoefficientn3; + uint32_t Table1XFilterCoefficientn2; + uint32_t Table1XFilterCoefficientn5; + uint32_t Table1XFilterCoefficientn4; + uint32_t Table1YFilterCoefficientn3; + uint32_t Table1YFilterCoefficientn2; + uint32_t Table1YFilterCoefficientn5; + uint32_t Table1YFilterCoefficientn4; +}; + +static inline void +GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | + 0; + + dw[3] = + __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | + 0; + + dw[4] = + __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | + 0; + + dw[5] = + __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | + 0; + + dw[7] = + __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define WSE_X 1 +#define WSE_Y 2 +#define WSE_XY 3 +#define WSE_Z 4 +#define WSE_XZ 5 +#define WSE_YZ 6 +#define WSE_XYZ 7 +#define WSE_W 8 +#define WSE_XW 9 +#define WSE_YW 10 +#define WSE_XYW 11 +#define WSE_ZW 12 +#define WSE_XZW 13 +#define WSE_YZW 14 +#define WSE_XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Clear Color */ +#define CC_ZERO 0 +#define CC_ONE 1 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c new file mode 100644 index 00000000000..fce2331873c --- /dev/null +++ b/src/vulkan/gen8_pipeline.c @@ -0,0 +1,669 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen8_pack.h" +#include "gen9_pack.h" + +#include "genX_pipeline_util.h" + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), + .PrimitiveTopologyType = pipeline->topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t samples = 1; + + if (ms_info) + samples = ms_info->rasterizationSamples; + + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); + + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + + /* For details on 3DSTATE_RASTER multisample state, see the BSpec table + * "Multisample Modes State". + */ + .DXMultisampleRasterizationEnable = samples > 1, + .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, + .ForceMultisampling = false, + + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), +#if ANV_GEN == 8 + .ViewportZClipTestEnable = true, +#else + /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ + .ViewportZFarClipTestEnable = true, + .ViewportZNearClipTestEnable = true, +#endif + }; + + GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + uint32_t num_dwords = GENX(BLEND_STATE_length); + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GENX(BLEND_STATE) blend_state = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { + blend_state.IndependentAlphaBlendEnable = true; + } + + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + } + + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + uint32_t *dw = ANV_GEN == 8 ? + pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; + + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen8.wm_depth_stencil, 0, + sizeof(pipeline->gen8.wm_depth_stencil)); + memset(pipeline->gen9.wm_depth_stencil, 0, + sizeof(pipeline->gen9.wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ + + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); +} + +static void +emit_ms_state(struct anv_pipeline *pipeline, + const VkPipelineMultisampleStateCreateInfo *info) +{ + uint32_t samples = 1; + uint32_t log2_samples = 0; + + /* From the Vulkan 1.0 spec: + * If pSampleMask is NULL, it is treated as if the mask has all bits + * enabled, i.e. no coverage is removed from fragments. + * + * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. + */ + uint32_t sample_mask = 0xffff; + + if (info) { + samples = info->rasterizationSamples; + log2_samples = __builtin_ffs(samples) - 1; + } + + if (info && info->pSampleMask) + sample_mask &= info->pSampleMask[0]; + + if (info && info->sampleShadingEnable) + anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + .PixelPositionOffsetEnable = false, + + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), + .SampleMask = sample_mask); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); +} + +VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterizationState); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, + pCreateInfo->pMultisampleState, extra); + emit_ms_state(pipeline, pCreateInfo->pMultisampleState); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->ps_ksp0 == NO_KERNEL ? + 0 : pipeline->wm_prog_data.barycentric_interp_modes); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_kernel == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_kernel, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + .StaticOutput = gs_prog_data->static_vertex_count >= 0, + .StaticOutputVertexCount = + gs_prog_data->static_vertex_count >= 0 ? + gs_prog_data->static_vertex_count : 0, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : + pipeline->vs_vec4; + + if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .FunctionEnable = false, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. */ + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = vs_start, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + wm_prog_data->num_varying_inputs, + +#if ANV_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + ); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, +#if ANV_GEN >= 9 + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = ICMS_NONE +#endif + ); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c new file mode 100644 index 00000000000..c749cbfed0c --- /dev/null +++ b/src/vulkan/gen8_state.c @@ -0,0 +1,391 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include "gen8_pack.h" +#include "gen9_pack.h" + +#include "genX_state_util.h" + +static const uint32_t +isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; + +void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) +{ + #if ANV_GENx10 >= 90 + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; + } + #else + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = anv_halign[image_align_sa.width]; + *valign = anv_valign[image_align_sa.height]; + #endif +} + +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + unreachable(!"bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + #if ANV_GENx10 >= 90 + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + #else + return isl_surf_get_array_pitch_el_rows(surf); + #endif + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + #if ANV_GEN >= 9 + return isl_surf_get_array_pitch_el_rows(surf); + #else + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + #endif + } +} + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y0] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, + }; + + uint32_t halign, valign; + get_halign_valign(&surface->isl, &halign, &valign); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, + .TileMode = isl_to_gen_tiling[surface->isl.tiling], + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, + .MemoryObjectControlState = GENX(MOCS), + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, + .Height = iview->level_0_extent.height - 1, + .Width = iview->level_0_extent.width - 1, + .Depth = 0, /* TEMPLATE */ + .SurfacePitch = surface->isl.row_pitch - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ + .MinimumArrayElement = 0, /* TEMPLATE */ + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[surface->isl.msaa_layout], + .NumberofMultisamples = ffs(surface->isl.samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ + .XOffset = 0, + .YOffset = 0, + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + switch (template.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + template.Depth = range->layerCount - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_CUBE: + template.MinimumArrayElement = range->baseArrayLayer; + /* Same as SURFTYPE_2D, but divided by 6 */ + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_3D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + template.Depth = image->extent.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + template.RenderTargetViewExtent = iview->extent.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = CLAMP_OGL, +#if ANV_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .IndirectStatePointer = border_color_offset >> 6, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/vulkan/gen9_pack.h b/src/vulkan/gen9_pack.h new file mode 100644 index 00000000000..df295f4900a --- /dev/null +++ b/src/vulkan/gen9_pack.h @@ -0,0 +1,9797 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for SKL. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ull >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ull << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_fixed(float v, uint32_t start, uint32_t end, + bool is_signed, uint32_t fract_bits) +{ + __gen_validate_value(v); + + const float factor = (1 << fract_bits); + + float max, min; + if (is_signed) { + max = ((1 << (end - start)) - 1) / factor; + min = -(1 << (end - start)) / factor; + } else { + max = ((1 << (end - start + 1)) - 1) / factor; + min = 0.0f; + } + + if (v > max) + v = max; + else if (v < min) + v = min; + + int32_t int_val = roundf(v * factor); + + if (is_signed) + int_val &= (1 << (end - start + 1)) - 1; + + return int_val << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN9_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_VS_length 0x00000002 + +struct GEN9_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_VS_length 0x00000009 + +#define __gen_prefix(name) GEN9_ ## name + +struct __gen_prefix(3DSTATE_VS) { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool VertexCacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN9_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN9_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN9_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN9_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 + +struct GEN9_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN9_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_MI_ATOMIC_length_bias 0x00000002 +#define GEN9_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +#define GEN9_MI_ATOMIC_length 0x00000003 + +struct __gen_prefix(MI_ATOMIC) { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN9_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN9_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN9_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +#define GEN9_MI_BATCH_BUFFER_START_length 0x00000003 + +struct GEN9_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Firstlevelbatch 0 +#define Secondlevelbatch 1 + uint32_t SecondLevelBatchBuffer; + bool AddOffsetEnable; + uint32_t PredicationEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN9_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SecondLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 2 + +#define GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000004 + +struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; +#define CompareMaskModeDisabled 0 +#define CompareMaskModeEnabled 1 + uint32_t CompareMaskMode; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN9_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->CompareMaskMode, 19, 19) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_FORCE_WAKEUP_length_bias 0x00000002 +#define GEN9_MI_FORCE_WAKEUP_header \ + .CommandType = 0, \ + .MICommandOpcode = 29, \ + .DwordLength = 0 + +#define GEN9_MI_FORCE_WAKEUP_length 0x00000002 + +struct GEN9_MI_FORCE_WAKEUP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t MaskBits; + uint32_t ForceRenderAwake; + uint32_t ForceMediaAwake; +}; + +static inline void +GEN9_MI_FORCE_WAKEUP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_FORCE_WAKEUP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MaskBits, 16, 31) | + __gen_field(values->ForceRenderAwake, 1, 1) | + __gen_field(values->ForceMediaAwake, 0, 0) | + 0; + +} + +#define GEN9_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN9_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN9_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN9_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN9_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN9_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN9_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN9_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +#define GEN9_MI_SEMAPHORE_SIGNAL_length 0x00000002 + +struct GEN9_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN9_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN9_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN9_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +#define GEN9_MI_SEMAPHORE_WAIT_length 0x00000004 + +struct GEN9_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN9_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN9_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN9_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + bool StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN9_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN9_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN9_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_REGISTER_MEM_length 0x00000004 + +struct GEN9_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN9_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN9_PIPELINE_SELECT_length 0x00000001 + +struct GEN9_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t MaskBits; + uint32_t ForceMediaAwake; + uint32_t MediaSamplerDOPClockGateEnable; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN9_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->MaskBits, 8, 15) | + __gen_field(values->ForceMediaAwake, 5, 5) | + __gen_field(values->MediaSamplerDOPClockGateEnable, 4, 4) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN9_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN9_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 17 + +#define GEN9_STATE_BASE_ADDRESS_length 0x00000013 + +#define GEN9_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN9_MEMORY_OBJECT_CONTROL_STATE { + uint32_t IndextoMOCSTables; +}; + +static inline void +GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->IndextoMOCSTables, 1, 6) | + 0; + +} + +struct GEN9_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + bool GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + bool DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + bool IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + bool InstructionBuffersizeModifyEnable; + __gen_address_type BindlessSurfaceStateBaseAddress; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE BindlessSurfaceStateMemoryObjectControlState; + bool BindlessSurfaceStateBaseAddressModifyEnable; + uint32_t BindlessSurfaceStateSize; +}; + +static inline void +GEN9_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + dw[3] = + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | + 0; + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw6 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw8 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw_InstructionMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw10 = + __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + + uint32_t dw_BindlessSurfaceStateMemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_BindlessSurfaceStateMemoryObjectControlState, &values->BindlessSurfaceStateMemoryObjectControlState); + uint32_t dw16 = + __gen_field(dw_BindlessSurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->BindlessSurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw16 = + __gen_combine_address(data, &dw[16], values->BindlessSurfaceStateBaseAddress, dw16); + + dw[16] = qw16; + dw[17] = qw16 >> 32; + + dw[18] = + __gen_field(values->BindlessSurfaceStateSize, 12, 31) | + 0; + +} + +#define GEN9_STATE_PREFETCH_length_bias 0x00000002 +#define GEN9_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN9_STATE_PREFETCH_length 0x00000002 + +struct GEN9_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN9_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN9_STATE_SIP_length_bias 0x00000002 +#define GEN9_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +#define GEN9_STATE_SIP_length 0x00000003 + +struct GEN9_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t SystemInstructionPointer; +}; + +static inline void +GEN9_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN9_3DPRIMITIVE_length_bias 0x00000002 +#define GEN9_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN9_3DPRIMITIVE_length 0x00000007 + +struct GEN9_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN9_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN9_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AAPointCoverageBias; + float AACoverageBias; + float AAPointCoverageSlope; + float AACoverageSlope; + float AAPointCoverageEndCapBias; + float AACoverageEndCapBias; + float AAPointCoverageEndCapSlope; + float AACoverageEndCapSlope; +}; + +static inline void +GEN9_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN9_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN9_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN9_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 + +struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + bool BlendStatePointerValid; +}; + +static inline void +GEN9_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + bool ColorCalcStatePointerValid; +}; + +static inline void +GEN9_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN9_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN9_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN9_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN9_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN9_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN9_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN9_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_CLIP_length 0x00000004 + +struct GEN9_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + bool ForceClipMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN9_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_DS_length 0x0000000b + +#define GEN9_3DSTATE_CONSTANT_BODY_length 0x0000000a + +struct GEN9_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint32_t dw4 = + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw6 = + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + +} + +struct GEN9_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_GS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_HS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_PS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_CONSTANT_VS_length 0x0000000b + +struct GEN9_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN9_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN9_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN9_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN9_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +#define GEN9_3DSTATE_DEPTH_BUFFER_length 0x00000008 + +struct GEN9_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_2D 1 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; +#define NONE 0 +#define TILEYF 1 +#define TILEYS 2 + uint32_t TiledResourceMode; + uint32_t MipTailStartLOD; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 6) | + 0; + + dw[6] = + __gen_field(values->TiledResourceMode, 30, 31) | + __gen_field(values->MipTailStartLOD, 26, 29) | + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN9_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN9_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN9_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_DS_length 0x0000000b + +struct GEN9_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; +#define SIMD4X2 0 +#define SIMD8_SINGLE_PATCH 1 +#define SIMD8_SINGLE_OR_DUAL_PATCH 2 + uint32_t DispatchMode; + bool ComputeWCoordinateEnable; + bool CacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint64_t DUAL_PATCHKernelStartPointer; +}; + +static inline void +GEN9_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->DispatchMode, 3, 4) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + uint64_t qw9 = + __gen_offset(values->DUAL_PATCHKernelStartPointer, 6, 63) | + 0; + + dw[9] = qw9; + dw[10] = qw9 >> 32; + +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN9_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN9_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN9_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN9_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN9_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN9_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN9_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN9_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + bool DX9OnDieRegisterReadEnable; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN9_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN9_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; +#define CommitGather 0 +#define NonCommitGather 1 + uint32_t UpdateGatherTableOnly; + bool DX9OnDieRegisterReadEnable; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; +#define Load 0 +#define Read 1 + uint32_t OnDieTable; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + __gen_field(values->UpdateGatherTableOnly, 1, 1) | + __gen_field(values->DX9OnDieRegisterReadEnable, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + __gen_field(values->OnDieTable, 3, 3) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 + +struct GEN9_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN9_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN9_3DSTATE_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +#define GEN9_3DSTATE_GS_length 0x0000000a + +struct GEN9_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData54; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + bool StatisticsEnable; + uint32_t InvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + bool StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t MaximumNumberofThreads; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN9_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData54, 29, 30) | + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + __gen_field(values->MaximumNumberofThreads, 0, 8) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_HS_length 0x00000009 + +struct GEN9_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + bool Enable; + bool StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint64_t KernelStartPointer; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData5; + bool SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool AccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; +#define SINGLE_PATCH 0 +#define DUAL_PATCH 1 +#define _8_PATCH 2 + uint32_t DispatchMode; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool IncludePrimitiveID; +}; + +static inline void +GEN9_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + uint64_t qw3 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = qw3; + dw[4] = qw3 >> 32; + + uint64_t qw5 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForURBData5, 28, 28) | + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->DispatchMode, 17, 18) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->IncludePrimitiveID, 0, 0) | + 0; + + dw[8] = + 0; + +} + +#define GEN9_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_INDEX_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN9_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN9_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN9_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN9_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN9_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN9_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN9_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN9_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN9_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN9_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_MULTISAMPLE_length 0x00000002 + +struct GEN9_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN9_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN9_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN9_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN9_3DSTATE_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +#define GEN9_3DSTATE_PS_length 0x0000000c + +struct GEN9_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + bool PushConstantEnable; + bool RenderTargetFastClearEnable; +#define RESOLVE_DISABLED 0 +#define RESOLVE_PARTIAL 1 +#define RESOLVE_FULL 3 + uint32_t RenderTargetResolveType; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint64_t KernelStartPointer1; + uint64_t KernelStartPointer2; +}; + +static inline void +GEN9_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveType, 6, 7) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + uint64_t qw8 = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint64_t qw10 = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + + dw[10] = qw10; + dw[11] = qw10 >> 32; + +} + +#define GEN9_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PS_BLEND_length 0x00000002 + +struct GEN9_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool AlphaToCoverageEnable; + bool HasWriteableRT; + bool ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaTestEnable; + bool IndependentAlphaBlendEnable; +}; + +static inline void +GEN9_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN9_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN9_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PS_EXTRA_length 0x00000002 + +struct GEN9_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool PixelShaderValid; + bool PixelShaderDoesnotwritetoRT; + bool oMaskPresenttoRenderTarget; + bool PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + bool ForceComputedDepth; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; + uint32_t Removed; + bool AttributeEnable; + bool PixelShaderDisablesAlphaToCoverage; + bool PixelShaderIsPerSample; + bool PixelShaderComputesStencil; + bool PixelShaderPullsBary; + bool PixelShaderHasUAV; +#define ICMS_NONE 0 +#define ICMS_NORMAL 1 +#define ICMS_INNER_CONSERVATIVE 2 +#define ICMS_DEPTH_COVERAGE 3 + uint32_t InputCoverageMaskState; +}; + +static inline void +GEN9_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderComputesStencil, 5, 5) | + __gen_field(values->PixelShaderPullsBary, 3, 3) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->InputCoverageMaskState, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN9_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_RASTER_length 0x00000005 + +struct GEN9_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ViewportZFarClipTestEnable; + bool ConservativeRasterizationEnable; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define FSC_NUMRASTSAMPLES_0 0 +#define FSC_NUMRASTSAMPLES_1 1 +#define FSC_NUMRASTSAMPLES_2 2 +#define FSC_NUMRASTSAMPLES_4 3 +#define FSC_NUMRASTSAMPLES_8 4 +#define FSC_NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + bool SmoothPointEnable; + bool DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool AntialiasingEnable; + bool ScissorRectangleEnable; + bool ViewportZNearClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN9_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ViewportZFarClipTestEnable, 26, 26) | + __gen_field(values->ConservativeRasterizationEnable, 24, 24) | + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZNearClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length_bias 0x00000002 +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 84, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_RS_CONSTANT_POINTER_length 0x00000004 + +struct GEN9_3DSTATE_RS_CONSTANT_POINTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define VS 0 +#define PS 4 + uint32_t ShaderSelect; +#define RS_STORE 0 +#define RS_LOAD 1 + uint32_t OperationLoadorStore; + __gen_address_type GlobalConstantBufferAddress; + __gen_address_type GlobalConstantBufferAddressHigh; +}; + +static inline void +GEN9_3DSTATE_RS_CONSTANT_POINTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_RS_CONSTANT_POINTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ShaderSelect, 28, 30) | + __gen_field(values->OperationLoadorStore, 12, 12) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->GlobalConstantBufferAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->GlobalConstantBufferAddressHigh, dw3); + +} + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN9_PALETTE_ENTRY_length 0x00000001 + +struct GEN9_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN9_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN9_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN9_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN9_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +#define GEN9_3DSTATE_SAMPLE_PATTERN_length 0x00000009 + +struct GEN9_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float _16xSample3XOffset; + float _16xSample3YOffset; + float _16xSample2XOffset; + float _16xSample2YOffset; + float _16xSample1XOffset; + float _16xSample1YOffset; + float _16xSample0XOffset; + float _16xSample0YOffset; + float _16xSample7XOffset; + float _16xSample7YOffset; + float _16xSample6XOffset; + float _16xSample6YOffset; + float _16xSample5XOffset; + float _16xSample5YOffset; + float _16xSample4XOffset; + float _16xSample4YOffset; + float _16xSample11XOffset; + float _16xSample11YOffset; + float _16xSample10XOffset; + float _16xSample10YOffset; + float _16xSample9XOffset; + float _16xSample9YOffset; + float _16xSample8XOffset; + float _16xSample8YOffset; + float _16xSample15XOffset; + float _16xSample15YOffset; + float _16xSample14XOffset; + float _16xSample14YOffset; + float _16xSample13XOffset; + float _16xSample13YOffset; + float _16xSample12XOffset; + float _16xSample12YOffset; + float _8xSample7XOffset; + float _8xSample7YOffset; + float _8xSample6XOffset; + float _8xSample6YOffset; + float _8xSample5XOffset; + float _8xSample5YOffset; + float _8xSample4XOffset; + float _8xSample4YOffset; + float _8xSample3XOffset; + float _8xSample3YOffset; + float _8xSample2XOffset; + float _8xSample2YOffset; + float _8xSample1XOffset; + float _8xSample1YOffset; + float _8xSample0XOffset; + float _8xSample0YOffset; + float _4xSample3XOffset; + float _4xSample3YOffset; + float _4xSample2XOffset; + float _4xSample2YOffset; + float _4xSample1XOffset; + float _4xSample1YOffset; + float _4xSample0XOffset; + float _4xSample0YOffset; + float _1xSample0XOffset; + float _1xSample0YOffset; + float _2xSample1XOffset; + float _2xSample1YOffset; + float _2xSample0XOffset; + float _2xSample0YOffset; +}; + +static inline void +GEN9_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->_16xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[2] = + __gen_field(values->_16xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->_16xSample11XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample11YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample10XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample10YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample9XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample9YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample8XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample8YOffset * (1 << 4), 0, 3) | + 0; + + dw[4] = + __gen_field(values->_16xSample15XOffset * (1 << 4), 28, 31) | + __gen_field(values->_16xSample15YOffset * (1 << 4), 24, 27) | + __gen_field(values->_16xSample14XOffset * (1 << 4), 20, 23) | + __gen_field(values->_16xSample14YOffset * (1 << 4), 16, 19) | + __gen_field(values->_16xSample13XOffset * (1 << 4), 12, 15) | + __gen_field(values->_16xSample13YOffset * (1 << 4), 8, 11) | + __gen_field(values->_16xSample12XOffset * (1 << 4), 4, 7) | + __gen_field(values->_16xSample12YOffset * (1 << 4), 0, 3) | + 0; + + dw[5] = + __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | + __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | + __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_SBE_length_bias 0x00000002 +#define GEN9_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 4 + +#define GEN9_3DSTATE_SBE_length 0x00000006 + +struct GEN9_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ForceVertexURBEntryReadLength; + bool ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + bool PrimitiveIDOverrideComponentW; + bool PrimitiveIDOverrideComponentZ; + bool PrimitiveIDOverrideComponentY; + bool PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; + uint32_t Attribute15ActiveComponentFormat; + uint32_t Attribute14ActiveComponentFormat; + uint32_t Attribute13ActiveComponentFormat; + uint32_t Attribute12ActiveComponentFormat; + uint32_t Attribute11ActiveComponentFormat; + uint32_t Attribute10ActiveComponentFormat; + uint32_t Attribute9ActiveComponentFormat; + uint32_t Attribute8ActiveComponentFormat; + uint32_t Attribute7ActiveComponentFormat; + uint32_t Attribute6ActiveComponentFormat; + uint32_t Attribute5ActiveComponentFormat; + uint32_t Attribute4ActiveComponentFormat; + uint32_t Attribute3ActiveComponentFormat; + uint32_t Attribute2ActiveComponentFormat; + uint32_t Attribute1ActiveComponentFormat; + uint32_t Attribute0ActiveComponentFormat; + uint32_t Attribute31ActiveComponentFormat; + uint32_t Attribute30ActiveComponentFormat; + uint32_t Attribute29ActiveComponentFormat; + uint32_t Attribute28ActiveComponentFormat; + uint32_t Attribute27ActiveComponentFormat; + uint32_t Attribute26ActiveComponentFormat; + uint32_t Attribute25ActiveComponentFormat; + uint32_t Attribute24ActiveComponentFormat; + uint32_t Attribute23ActiveComponentFormat; + uint32_t Attribute22ActiveComponentFormat; + uint32_t Attribute21ActiveComponentFormat; + uint32_t Attribute20ActiveComponentFormat; + uint32_t Attribute19ActiveComponentFormat; + uint32_t Attribute18ActiveComponentFormat; + uint32_t Attribute17ActiveComponentFormat; + uint32_t Attribute16ActiveComponentFormat; +}; + +static inline void +GEN9_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Attribute15ActiveComponentFormat, 30, 31) | + __gen_field(values->Attribute14ActiveComponentFormat, 28, 29) | + __gen_field(values->Attribute13ActiveComponentFormat, 26, 27) | + __gen_field(values->Attribute12ActiveComponentFormat, 24, 25) | + __gen_field(values->Attribute11ActiveComponentFormat, 22, 23) | + __gen_field(values->Attribute10ActiveComponentFormat, 20, 21) | + __gen_field(values->Attribute9ActiveComponentFormat, 18, 19) | + __gen_field(values->Attribute8ActiveComponentFormat, 16, 17) | + __gen_field(values->Attribute7ActiveComponentFormat, 14, 15) | + __gen_field(values->Attribute6ActiveComponentFormat, 12, 13) | + __gen_field(values->Attribute5ActiveComponentFormat, 10, 11) | + __gen_field(values->Attribute4ActiveComponentFormat, 8, 9) | + __gen_field(values->Attribute3ActiveComponentFormat, 6, 7) | + __gen_field(values->Attribute2ActiveComponentFormat, 4, 5) | + __gen_field(values->Attribute1ActiveComponentFormat, 2, 3) | + __gen_field(values->Attribute0ActiveComponentFormat, 0, 1) | + 0; + + dw[5] = + __gen_field(values->Attribute31ActiveComponentFormat, 30, 31) | + __gen_field(values->Attribute30ActiveComponentFormat, 28, 29) | + __gen_field(values->Attribute29ActiveComponentFormat, 26, 27) | + __gen_field(values->Attribute28ActiveComponentFormat, 24, 25) | + __gen_field(values->Attribute27ActiveComponentFormat, 22, 23) | + __gen_field(values->Attribute26ActiveComponentFormat, 20, 21) | + __gen_field(values->Attribute25ActiveComponentFormat, 18, 19) | + __gen_field(values->Attribute24ActiveComponentFormat, 16, 17) | + __gen_field(values->Attribute23ActiveComponentFormat, 14, 15) | + __gen_field(values->Attribute22ActiveComponentFormat, 12, 13) | + __gen_field(values->Attribute21ActiveComponentFormat, 10, 11) | + __gen_field(values->Attribute20ActiveComponentFormat, 8, 9) | + __gen_field(values->Attribute19ActiveComponentFormat, 6, 7) | + __gen_field(values->Attribute18ActiveComponentFormat, 4, 5) | + __gen_field(values->Attribute17ActiveComponentFormat, 2, 3) | + __gen_field(values->Attribute16ActiveComponentFormat, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN9_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +#define GEN9_3DSTATE_SBE_SWIZ_length 0x0000000b + +#define GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + +struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL { + bool ComponentOverrideW; + bool ComponentOverrideZ; + bool ComponentOverrideY; + bool ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN9_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; + uint32_t AttributeWrapShortestEnables[16]; +}; + +static inline void +GEN9_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { + uint32_t dw_Attribute0; + GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); + uint32_t dw_Attribute1; + GEN9_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); + dw[j] = + __gen_field(dw_Attribute0, 0, 15) | + __gen_field(dw_Attribute1, 16, 31) | + 0; + } + + for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { + dw[j] = + __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | + __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | + __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | + __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | + __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | + __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | + __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | + __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | + 0; + } + +} + +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN9_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_SF_length_bias 0x00000002 +#define GEN9_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_SF_length 0x00000004 + +struct GEN9_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float LineWidth; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool ViewportTransformEnable; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + bool LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + bool SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + float PointWidth; +}; + +static inline void +GEN9_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LineWidth * (1 << 7), 12, 29) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + +} + +#define GEN9_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +#define GEN9_3DSTATE_SO_BUFFER_length 0x00000008 + +struct GEN9_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool SOBufferEnable; + uint32_t SOBufferIndex; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + bool StreamOffsetWriteEnable; + bool StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN9_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 22, 28) | + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + uint64_t qw5 = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN9_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN9_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN9_SO_DECL_ENTRY_length 0x00000002 + +#define GEN9_SO_DECL_length 0x00000001 + +struct GEN9_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN9_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN9_SO_DECL_ENTRY { + struct GEN9_SO_DECL Stream3Decl; + struct GEN9_SO_DECL Stream2Decl; + struct GEN9_SO_DECL Stream1Decl; + struct GEN9_SO_DECL Stream0Decl; +}; + +static inline void +GEN9_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN9_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN9_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN9_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN9_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN9_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN9_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_STENCIL_BUFFER_length 0x00000005 + +struct GEN9_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN9_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 22, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN9_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_STREAMOUT_length 0x00000005 + +struct GEN9_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN9_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN9_3DSTATE_TE_length_bias 0x00000002 +#define GEN9_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_TE_length 0x00000004 + +struct GEN9_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN9_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN9_3DSTATE_URB_CLEAR_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_CLEAR_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_CLEAR_length 0x00000002 + +struct GEN9_3DSTATE_URB_CLEAR { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN9_3DSTATE_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN9_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_DS_length 0x00000002 + +struct GEN9_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_GS_length 0x00000002 + +struct GEN9_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN9_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_URB_HS_length 0x00000002 + +struct GEN9_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN9_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN9_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN9_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN9_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN9_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN9_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN9_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(dw_MemoryObjectControlState, 16, 22) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN9_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN9_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN9_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN9_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN9_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN9_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN9_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN9_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_3DSTATE_VF_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_length 0x00000002 + +struct GEN9_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool SequentialDrawCutIndexEnable; + bool ComponentPackingEnable; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN9_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->SequentialDrawCutIndexEnable, 10, 10) | + __gen_field(values->ComponentPackingEnable, 9, 9) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 85, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_VF_COMPONENT_PACKING_length 0x00000005 + +struct GEN9_3DSTATE_VF_COMPONENT_PACKING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VertexElement07Enables; + uint32_t VertexElement06Enables; + uint32_t VertexElement05Enables; + uint32_t VertexElement04Enables; + uint32_t VertexElement03Enables; + uint32_t VertexElement02Enables; + uint32_t VertexElement01Enables; + uint32_t VertexElement00Enables; + uint32_t VertexElement15Enables; + uint32_t VertexElement14Enables; + uint32_t VertexElement13Enables; + uint32_t VertexElement12Enables; + uint32_t VertexElement11Enables; + uint32_t VertexElement10Enables; + uint32_t VertexElement09Enables; + uint32_t VertexElement08Enables; + uint32_t VertexElement23Enables; + uint32_t VertexElement22Enables; + uint32_t VertexElement21Enables; + uint32_t VertexElement20Enables; + uint32_t VertexElement19Enables; + uint32_t VertexElement18Enables; + uint32_t VertexElement17Enables; + uint32_t VertexElement16Enables; + uint32_t VertexElement31Enables; + uint32_t VertexElement30Enables; + uint32_t VertexElement29Enables; + uint32_t VertexElement28Enables; + uint32_t VertexElement27Enables; + uint32_t VertexElement26Enables; + uint32_t VertexElement25Enables; + uint32_t VertexElement24Enables; +}; + +static inline void +GEN9_3DSTATE_VF_COMPONENT_PACKING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_COMPONENT_PACKING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VertexElement07Enables, 28, 31) | + __gen_field(values->VertexElement06Enables, 24, 27) | + __gen_field(values->VertexElement05Enables, 20, 23) | + __gen_field(values->VertexElement04Enables, 16, 19) | + __gen_field(values->VertexElement03Enables, 12, 15) | + __gen_field(values->VertexElement02Enables, 8, 11) | + __gen_field(values->VertexElement01Enables, 4, 7) | + __gen_field(values->VertexElement00Enables, 0, 3) | + 0; + + dw[2] = + __gen_field(values->VertexElement15Enables, 28, 31) | + __gen_field(values->VertexElement14Enables, 24, 27) | + __gen_field(values->VertexElement13Enables, 20, 23) | + __gen_field(values->VertexElement12Enables, 16, 19) | + __gen_field(values->VertexElement11Enables, 12, 15) | + __gen_field(values->VertexElement10Enables, 8, 11) | + __gen_field(values->VertexElement09Enables, 4, 7) | + __gen_field(values->VertexElement08Enables, 0, 3) | + 0; + + dw[3] = + __gen_field(values->VertexElement23Enables, 28, 31) | + __gen_field(values->VertexElement22Enables, 24, 27) | + __gen_field(values->VertexElement21Enables, 20, 23) | + __gen_field(values->VertexElement20Enables, 16, 19) | + __gen_field(values->VertexElement19Enables, 12, 15) | + __gen_field(values->VertexElement18Enables, 8, 11) | + __gen_field(values->VertexElement17Enables, 4, 7) | + __gen_field(values->VertexElement16Enables, 0, 3) | + 0; + + dw[4] = + __gen_field(values->VertexElement31Enables, 28, 31) | + __gen_field(values->VertexElement30Enables, 24, 27) | + __gen_field(values->VertexElement29Enables, 20, 23) | + __gen_field(values->VertexElement28Enables, 16, 19) | + __gen_field(values->VertexElement27Enables, 12, 15) | + __gen_field(values->VertexElement26Enables, 8, 11) | + __gen_field(values->VertexElement25Enables, 4, 7) | + __gen_field(values->VertexElement24Enables, 0, 3) | + 0; + +} + +#define GEN9_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +#define GEN9_3DSTATE_VF_INSTANCING_length 0x00000003 + +struct GEN9_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN9_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN9_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_SGVS_length 0x00000002 + +struct GEN9_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + bool VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN9_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN9_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN9_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN9_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN9_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN9_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN9_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VF_TOPOLOGY_length 0x00000002 + +struct GEN9_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN9_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN9_3DSTATE_WM_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_WM_length 0x00000002 + +struct GEN9_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool LegacyDepthBufferClearEnable; + bool LegacyDepthBufferResolveEnable; + bool LegacyHierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN9_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN9_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +#define GEN9_3DSTATE_WM_CHROMAKEY_length 0x00000002 + +struct GEN9_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ChromaKeyKillEnable; +}; + +static inline void +GEN9_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 2 + +#define GEN9_3DSTATE_WM_DEPTH_STENCIL_length 0x00000004 + +struct GEN9_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; + bool DoubleSidedStencilEnable; + bool StencilTestEnable; + bool StencilBufferWriteEnable; + bool DepthTestEnable; + bool DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + uint32_t StencilReferenceValue; + uint32_t BackfaceStencilReferenceValue; +}; + +static inline void +GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[3] = + __gen_field(values->StencilReferenceValue, 8, 15) | + __gen_field(values->BackfaceStencilReferenceValue, 0, 7) | + 0; + +} + +#define GEN9_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN9_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +#define GEN9_3DSTATE_WM_HZ_OP_length 0x00000005 + +struct GEN9_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StencilBufferClearEnable; + bool DepthBufferClearEnable; + bool ScissorRectangleEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + bool FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN9_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN9_GPGPU_WALKER_length_bias 0x00000002 +#define GEN9_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +#define GEN9_GPGPU_WALKER_length 0x0000000f + +struct GEN9_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN9_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN9_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN9_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN9_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN9_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN9_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN9_MEDIA_OBJECT_length 0x00000000 + +struct GEN9_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + uint32_t SliceDestinationSelectMSBs; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define Subslice3 3 +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->SliceDestinationSelectMSBs, 25, 26) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +#define GEN9_MEDIA_OBJECT_GRPID_length 0x00000000 + +struct GEN9_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t SliceDestinationSelectMSB; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define Subslice3 3 +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoreboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SliceDestinationSelectMSB, 24, 24) | + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoreboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN9_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN9_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN9_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN9_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN9_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN9_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN9_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t MaskedDispatch; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + bool ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN9_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->MaskedDispatch, 22, 23) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 27) | + __gen_field(values->LocalLoopExecCount, 0, 11) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 26) | + __gen_field(values->BlockResolutionX, 0, 10) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 26) | + __gen_field(values->LocalStartX, 0, 10) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 27) | + __gen_field(values->LocalOuterLoopStrideX, 0, 11) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 27) | + __gen_field(values->LocalInnerLoopUnitX, 0, 11) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 26) | + __gen_field(values->GlobalResolutionX, 0, 10) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 27) | + __gen_field(values->GlobalStartX, 0, 11) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 27) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 11) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 27) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 11) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN9_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN9_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN9_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN9_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN9_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN9_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +#define GEN9_MEDIA_VFE_STATE_length 0x00000009 + +struct GEN9_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN9_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN9_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN9_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN9_MI_ARB_CHECK_length 0x00000001 + +struct GEN9_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN9_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN9_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN9_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_CLFLUSH_length_bias 0x00000002 +#define GEN9_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN9_MI_CLFLUSH_length 0x00000000 + +struct GEN9_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + /* variable length fields follow */ +} + +#define GEN9_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN9_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +#define GEN9_MI_COPY_MEM_MEM_length 0x00000005 + +struct GEN9_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN9_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw3 = + 0; + + uint64_t qw3 = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + + dw[3] = qw3; + dw[4] = qw3 >> 32; + +} + +#define GEN9_MI_DISPLAY_FLIP_length_bias 0x00000002 +#define GEN9_MI_DISPLAY_FLIP_header \ + .CommandType = 0, \ + .MICommandOpcode = 20 + +#define GEN9_MI_DISPLAY_FLIP_length 0x00000003 + +struct GEN9_MI_DISPLAY_FLIP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool AsyncFlipIndicator; +#define DisplayPlane1 0 +#define DisplayPlane2 1 +#define DisplayPlane3 2 +#define DisplayPlane4 4 +#define DisplayPlane5 5 +#define DisplayPlane6 6 +#define DisplayPlane7 7 +#define DisplayPlane8 8 +#define DisplayPlane9 9 +#define DisplayPlane10 10 +#define DisplayPlane11 11 +#define DisplayPlane12 12 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + bool Stereoscopic3DMode; + uint32_t DisplayBufferPitch; +#define Linear 0 +#define TiledX 1 +#define TiledYLegacyYB 4 +#define TiledYF 5 + bool TileParameter; + __gen_address_type DisplayBufferBaseAddress; +#define SyncFlip 0 +#define AsyncFlip 1 +#define Stereo3DFlip 2 + uint32_t FlipType; + __gen_address_type LeftEyeDisplayBufferBaseAddress; +}; + +static inline void +GEN9_MI_DISPLAY_FLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_DISPLAY_FLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->AsyncFlipIndicator, 22, 22) | + __gen_field(values->DisplayPlaneSelect, 8, 12) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Stereoscopic3DMode, 31, 31) | + __gen_field(values->DisplayBufferPitch, 6, 15) | + __gen_field(values->TileParameter, 0, 2) | + 0; + + uint32_t dw2 = + __gen_field(values->FlipType, 0, 1) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->DisplayBufferBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->LeftEyeDisplayBufferBaseAddress, dw3); + +} + +#define GEN9_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN9_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +#define GEN9_MI_LOAD_REGISTER_MEM_length 0x00000004 + +struct GEN9_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN9_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN9_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN9_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN9_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN9_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN9_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN9_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlane1A 0 +#define DisplayPlane1B 1 +#define DisplayPlane1C 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + bool ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN9_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN9_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN9_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +#define GEN9_MI_LOAD_URB_MEM_length 0x00000004 + +struct GEN9_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_MATH_length_bias 0x00000002 +#define GEN9_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN9_MI_MATH_length 0x00000000 + +struct GEN9_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN9_MI_NOOP_length_bias 0x00000001 +#define GEN9_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN9_MI_NOOP_length 0x00000001 + +struct GEN9_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN9_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN9_MI_PREDICATE_length_bias 0x00000001 +#define GEN9_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN9_MI_PREDICATE_length 0x00000001 + +struct GEN9_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN9_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN9_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN9_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN9_MI_REPORT_HEAD_length 0x00000001 + +struct GEN9_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN9_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN9_MI_RS_CONTEXT_length 0x00000001 + +struct GEN9_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN9_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN9_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN9_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN9_MI_RS_CONTROL_length 0x00000001 + +struct GEN9_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN9_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN9_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN9_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN9_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN9_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN9_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN9_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN9_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN9_MI_SET_CONTEXT_length 0x00000002 + +struct GEN9_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN9_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN9_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN9_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +#define GEN9_MI_SET_PREDICATE_length 0x00000001 + +struct GEN9_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN9_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN9_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN9_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN9_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN9_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN9_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN9_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN9_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +#define GEN9_MI_STORE_URB_MEM_length 0x00000004 + +struct GEN9_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN9_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN9_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN9_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN9_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN9_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN9_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN9_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN9_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN9_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN9_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN9_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN9_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN9_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN9_MI_UPDATE_GTT_length 0x00000000 + +struct GEN9_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN9_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN9_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN9_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN9_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN9_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN9_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN9_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN9_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN9_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN9_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN9_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN9_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN9_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN9_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN9_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPlane1CVerticalBlankWaitEnable; + bool DisplayPlane6FlipPendingWaitEnable; + bool DisplayPlane12FlipPendingWaitEnable; + bool DisplayPlane11FlipPendingWaitEnable; + bool DisplayPlane10FlipPendingWaitEnable; + bool DisplayPlane9FlipPendingWaitEnable; + bool DisplayPlane3FlipPendingWaitEnable; + bool DisplayPlane1CScanLineWaitEnable; + bool DisplayPlane1BVerticalBlankWaitEnable; + bool DisplayPlane5FlipPendingWaitEnable; + bool DisplayPlane2FlipPendingWaitEnable; + bool DisplayPlane1BScanLineWaitEnable; + bool DisplayPlane8FlipPendingWaitEnable; + bool DisplayPlane7FlipPendingWaitEnable; + bool DisplayPlane1AVerticalBlankWaitEnable; + bool DisplayPlane4FlipPendingWaitEnable; + bool DisplayPlane1FlipPendingWaitEnable; + bool DisplayPlnae1AScanLineWaitEnable; +}; + +static inline void +GEN9_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlane1CVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplayPlane6FlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlane12FlipPendingWaitEnable, 19, 19) | + __gen_field(values->DisplayPlane11FlipPendingWaitEnable, 18, 18) | + __gen_field(values->DisplayPlane10FlipPendingWaitEnable, 17, 17) | + __gen_field(values->DisplayPlane9FlipPendingWaitEnable, 16, 16) | + __gen_field(values->DisplayPlane3FlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPlane1CScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPlane1BVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplayPlane5FlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlane2FlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPlane1BScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPlane8FlipPendingWaitEnable, 7, 7) | + __gen_field(values->DisplayPlane7FlipPendingWaitEnable, 6, 6) | + __gen_field(values->DisplayPlane1AVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplayPlane4FlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlane1FlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPlnae1AScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN9_PIPE_CONTROL_length_bias 0x00000002 +#define GEN9_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +#define GEN9_PIPE_CONTROL_length 0x00000006 + +struct GEN9_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool FlushLLC; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint64_t ImmediateData; +}; + +static inline void +GEN9_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FlushLLC, 26, 26) | + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint64_t qw4 = + __gen_field(values->ImmediateData, 0, 63) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + +} + +#define GEN9_SCISSOR_RECT_length 0x00000002 + +struct GEN9_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN9_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN9_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN9_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN9_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +#define GEN9_BLEND_STATE_length 0x00000011 + +#define GEN9_BLEND_STATE_ENTRY_length 0x00000002 + +struct GEN9_BLEND_STATE_ENTRY { + bool LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; + bool ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; +}; + +static inline void +GEN9_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint64_t qw0 = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN9_BLEND_STATE { + bool AlphaToCoverageEnable; + bool IndependentAlphaBlendEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + struct GEN9_BLEND_STATE_ENTRY Entry[8]; +}; + +static inline void +GEN9_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) + GEN9_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); +} + +#define GEN9_CC_VIEWPORT_length 0x00000002 + +struct GEN9_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN9_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN9_COLOR_CALC_STATE_length 0x00000006 + +struct GEN9_COLOR_CALC_STATE { +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN9_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_length 0x00000001 + +struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR { + uint32_t ExtendedMessageLength; +#define NoTermination 0 +#define EOT 1 + uint32_t EndOfThread; + uint32_t TargetFunctionID; +}; + +static inline void +GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_EXECUTION_UNIT_EXTENDED_MESSAGE_DESCRIPTOR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ExtendedMessageLength, 6, 9) | + __gen_field(values->EndOfThread, 5, 5) | + __gen_field(values->TargetFunctionID, 0, 3) | + 0; + +} + +#define GEN9_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN9_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; +#define Encodes0K 0 +#define Encodes1K 1 +#define Encodes2K 2 +#define Encodes4K 3 +#define Encodes8K 4 +#define Encodes16K 5 +#define Encodes32K 6 +#define Encodes64K 7 + uint32_t SharedLocalMemorySize; + bool GlobalBarrierEnable; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN9_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->GlobalBarrierEnable, 15, 15) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +#define GEN9_ROUNDINGPRECISIONTABLE_3_BITS_length 0x00000001 + +struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS { +#define _116 0 +#define _216 1 +#define _316 2 +#define _416 3 +#define _516 4 +#define _616 5 +#define _716 6 +#define _816 7 + uint32_t RoundingPrecision; +}; + +static inline void +GEN9_ROUNDINGPRECISIONTABLE_3_BITS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_ROUNDINGPRECISIONTABLE_3_BITS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->RoundingPrecision, 0, 2) | + 0; + +} + +#define GEN9_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN9_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN9_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 6, 31) | + 0; + +} + +#define GEN9_RENDER_SURFACE_STATE_length 0x00000010 + +struct GEN9_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + bool ASTC_Enable; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + bool SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + bool CubeFaceEnablePositiveZ; + bool CubeFaceEnableNegativeZ; + bool CubeFaceEnablePositiveY; + bool CubeFaceEnableNegativeY; + bool CubeFaceEnablePositiveX; + bool CubeFaceEnableNegativeX; + struct GEN9_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + float BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _180DEG 2 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 +#define MULTISAMPLECOUNT_16 4 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + bool EWADisableForCube; +#define NONE 0 +#define _4KB 1 +#define _64KB 2 +#define TILEYF 1 +#define TILEYS 2 + uint32_t TiledResourceMode; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t MipTailStartLOD; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_CCS_D 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 +#define AUX_CCS_E 5 + uint32_t AuxiliarySurfaceMode; + bool SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; +#define Horizontal 0 +#define Vertical 1 + uint32_t MemoryCompressionMode; + bool MemoryCompressionEnable; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + float ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; + uint32_t QuiltHeight; + uint32_t QuiltWidth; + float HierarchicalDepthClearValue; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; +}; + +static inline void +GEN9_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->ASTC_Enable, 27, 27) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN9_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(dw_MemoryObjectControlState, 24, 30) | + __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->TiledResourceMode, 18, 19) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->MipTailStartLOD, 8, 11) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->MemoryCompressionMode, 31, 31) | + __gen_field(values->MemoryCompressionEnable, 30, 30) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + __gen_field(values->QuiltHeight, 5, 9) | + __gen_field(values->QuiltWidth, 0, 4) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_float(values->HierarchicalDepthClearValue) | + __gen_field(values->RedClearColor, 0, 31) | + 0; + + dw[13] = + __gen_field(values->GreenClearColor, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BlueClearColor, 0, 31) | + 0; + + dw[15] = + __gen_field(values->AlphaClearColor, 0, 31) | + 0; + +} + +#define GEN9_FILTER_COEFFICIENT_length 0x00000001 + +struct GEN9_FILTER_COEFFICIENT { + uint32_t FilterCoefficient; +}; + +static inline void +GEN9_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_FILTER_COEFFICIENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->FilterCoefficient, 0, 7) | + 0; + +} + +#define GEN9_SAMPLER_STATE_length 0x00000004 + +struct GEN9_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + uint32_t CoarseLODQualityMode; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + float TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define STD_FILTER 0 +#define COMPARISON 1 +#define MINIMUM 2 +#define MAXIMUM 3 + uint32_t ReductionType; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + bool ReductionTypeEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN9_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->CoarseLODQualityMode, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_fixed(values->TextureLODBias, 1, 13, true, 8) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->ReductionType, 22, 23) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->ReductionTypeEnable, 9, 9) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +#define GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 + +struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { + uint32_t Table0YFilterCoefficientn1; + uint32_t Table0XFilterCoefficientn1; + uint32_t Table0YFilterCoefficientn0; + uint32_t Table0XFilterCoefficientn0; + uint32_t Table0YFilterCoefficientn3; + uint32_t Table0XFilterCoefficientn3; + uint32_t Table0YFilterCoefficientn2; + uint32_t Table0XFilterCoefficientn2; + uint32_t Table0YFilterCoefficientn5; + uint32_t Table0XFilterCoefficientn5; + uint32_t Table0YFilterCoefficientn4; + uint32_t Table0XFilterCoefficientn4; + uint32_t Table0YFilterCoefficientn7; + uint32_t Table0XFilterCoefficientn7; + uint32_t Table0YFilterCoefficientn6; + uint32_t Table0XFilterCoefficientn6; + uint32_t Table1XFilterCoefficientn3; + uint32_t Table1XFilterCoefficientn2; + uint32_t Table1XFilterCoefficientn5; + uint32_t Table1XFilterCoefficientn4; + uint32_t Table1YFilterCoefficientn3; + uint32_t Table1YFilterCoefficientn2; + uint32_t Table1YFilterCoefficientn5; + uint32_t Table1YFilterCoefficientn4; +}; + +static inline void +GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN9_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | + 0; + + dw[3] = + __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | + 0; + + dw[4] = + __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | + 0; + + dw[5] = + __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | + 0; + + dw[7] = + __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum COMPONENT_ENABLES */ +#define CE_NONE 0 +#define CE_X 1 +#define CE_Y 2 +#define CE_XY 3 +#define CE_Z 4 +#define CE_XZ 5 +#define CE_YZ 6 +#define CE_XYZ 7 +#define CE_W 8 +#define CE_XW 9 +#define CE_YW 10 +#define CE_XYW 11 +#define CE_ZW 12 +#define CE_XZW 13 +#define CE_YZW 14 +#define CE_XYZW 15 + +/* Enum Attribute_Component_Format */ +#define ACF_DISABLED 0 +#define ACF_XY 1 +#define ACF_XYZ 2 +#define ACF_XYZW 3 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define WSE_X 1 +#define WSE_Y 2 +#define WSE_XY 3 +#define WSE_Z 4 +#define WSE_XZ 5 +#define WSE_YZ 6 +#define WSE_XYZ 7 +#define WSE_W 8 +#define WSE_XW 9 +#define WSE_YW 10 +#define WSE_XYW 11 +#define WSE_ZW 12 +#define WSE_XZW 13 +#define WSE_YZW 14 +#define WSE_XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c new file mode 100644 index 00000000000..1ab17470932 --- /dev/null +++ b/src/vulkan/genX_cmd_buffer.c @@ -0,0 +1,275 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "gen9_pack.h" +#elif (ANV_GEN == 8) +# include "gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "gen75_pack.h" +#elif (ANV_GEN == 7) +# include "gen7_pack.h" +#endif + +void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + +/* XXX: Do we need this on more than just BDW? */ +#if (ANV_GEN >= 8) + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GENX(MOCS), + .GeneralStateBaseAddressModifyEnable = true, + + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), + .SurfaceStateMemoryObjectControlState = GENX(MOCS), + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GENX(MOCS), + .DynamicStateBaseAddressModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GENX(MOCS), + .IndirectObjectBaseAddressModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GENX(MOCS), + .InstructionBaseAddressModifyEnable = true, + +# if (ANV_GEN >= 8) + /* Broadwell requires that we specify a buffer size for a bunch of + * these fields. However, since we will be growing the BO's live, we + * just set them all to the maximum. + */ + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true, +# endif + ); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true); +} + +void genX(CmdPipelineBarrier)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + uint32_t b, *dw; + + struct GENX(PIPE_CONTROL) cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; + + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; + } + + /* The src flags represent how things were used previously. This is + * what we use for doing flushes. + */ + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + /* Doesn't require a flush */ + break; + } + } + + /* The dst flags represent how things will be used in the fugure. This + * is what we use for doing cache invalidations. + */ + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_ACCESS_SHADER_READ_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_TRANSFER_READ_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_MEMORY_READ_BIT: + break; /* XXX: What is this? */ + default: + /* Doesn't require a flush */ + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd); +} diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c new file mode 100644 index 00000000000..b62e401c4ee --- /dev/null +++ b/src/vulkan/genX_pipeline.c @@ -0,0 +1,126 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "gen9_pack.h" +#elif (ANV_GEN == 8) +# include "gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "gen75_pack.h" +#elif (ANV_GEN == 7) +# include "gen7_pack.h" +#endif + +VkResult +genX(compute_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + pipeline->blend_state.map = NULL; + + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + + pipeline->use_repclear = false; + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), +#if ANV_GEN > 7 + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, +#else + .GPGPUMode = true, +#endif + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .ResetGatewayTimer = true, +#if ANV_GEN <= 8 + .BypassGatewayControl = true, +#endif + .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h new file mode 100644 index 00000000000..da61a1e5d39 --- /dev/null +++ b/src/vulkan/genX_pipeline_util.h @@ -0,0 +1,269 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static uint32_t +vertex_element_comp_control(enum isl_format format, unsigned comp) +{ + uint8_t bits; + switch (comp) { + case 0: bits = isl_format_layouts[format].channels.r.bits; break; + case 1: bits = isl_format_layouts[format].channels.g.bits; break; + case 2: bits = isl_format_layouts[format].channels.b.bits; break; + case 3: bits = isl_format_layouts[format].channels.a.bits; break; + default: unreachable("Invalid component"); + } + + if (bits) { + return VFCOMP_STORE_SRC; + } else if (comp < 3) { + return VFCOMP_STORE_0; + } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || + isl_format_layouts[format].channels.r.type == ISL_SINT) { + assert(comp == 3); + return VFCOMP_STORE_1_INT; + } else { + assert(comp == 3); + return VFCOMP_STORE_1_FP; + } +} + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t elements; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + elements = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + elements |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + elements = inputs_read >> VERT_ATTRIB_GENERIC0; + } + +#if ANV_GEN >= 8 + /* On BDW+, we only need to allocate space for base ids. Setting up + * the actual vertex and instance id is a separate packet. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#else + /* On Haswell and prior, vertex and instance id are created by using the + * ComponentControl fields, so we need an element for any of them. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid || + pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#endif + + uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + + uint32_t *p; + if (elem_count > 0) { + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); + } + + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, + NULL); + + assert(desc->binding < 32); + + if ((elements & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offset, + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); + +#if ANV_GEN >= 8 + /* On Broadwell and later, we have a separate VF_INSTANCING packet + * that controls instancing. On Haswell and prior, that's part of + * VERTEX_BUFFER_STATE which we emit later. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), + .InstancingEnable = pipeline->instancing_enable[desc->binding], + .VertexElementIndex = slot, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); +#endif + } + + const uint32_t id_slot = __builtin_popcount(elements); + if (needs_svgs_elem) { + /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: + * "Within a VERTEX_ELEMENT_STATE structure, if a Component + * Control field is set to something other than VFCOMP_STORE_SRC, + * no higher-numbered Component Control fields may be set to + * VFCOMP_STORE_SRC" + * + * This means, that if we have BaseInstance, we need BaseVertex as + * well. Just do all or nothing. + */ + uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance) ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0; + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = 32, /* Reserved for this */ + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32_UINT, + .Component0Control = base_ctrl, + .Component1Control = base_ctrl, +#if ANV_GEN >= 8 + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#else + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID, +#endif + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); + } + +#if ANV_GEN >= 8 + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = id_slot, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = id_slot); +#endif +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_POLYGON_MODE_FILL] = RASTER_SOLID, + [VK_POLYGON_MODE_LINE] = RASTER_WIREFRAME, + [VK_POLYGON_MODE_POINT] = RASTER_POINT, +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, +}; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h new file mode 100644 index 00000000000..67f798ab66e --- /dev/null +++ b/src/vulkan/genX_state_util.h @@ -0,0 +1,112 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + +static enum isl_format +anv_surface_format(const struct anv_device *device, enum isl_format format, + bool storage) +{ + if (storage) { + return isl_lower_storage_image_format(&device->isl_dev, format); + } else { + return format; + } +} + +#if ANV_GEN > 7 || ANV_IS_HASWELL +static const uint32_t vk_to_gen_swizzle[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; +#endif + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore new file mode 100644 index 00000000000..5d054055685 --- /dev/null +++ b/src/vulkan/tests/.gitignore @@ -0,0 +1,5 @@ +block_pool +block_pool_no_free +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am new file mode 100644 index 00000000000..883013d86c6 --- /dev/null +++ b/src/vulkan/tests/Makefile.am @@ -0,0 +1,46 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c new file mode 100644 index 00000000000..86d1a76151f --- /dev/null +++ b/src/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; + uint32_t back_blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + int32_t block, *data; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = anv_block_pool_alloc(job->pool); + data = job->pool->map + block; + *data = block; + assert(block >= 0); + job->blocks[i] = block; + + block = anv_block_pool_alloc_back(job->pool); + data = job->pool->map + block; + *data = block; + assert(block < 0); + job->back_blocks[i] = -block; + } + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = job->blocks[i]; + data = job->pool->map + block; + assert(*data == block); + + block = -job->back_blocks[i]; + data = job->pool->map + block; + assert(*data == block); + } + + return NULL; +} + +static void validate_monotonic(uint32_t **blocks) +{ + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < blocks[i][next[i]]) { + thread_max = blocks[i][next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); + + highest = blocks[max_thread_idx][next[max_thread_idx]]; + next[max_thread_idx]++; + } +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* Validate that the block allocations were monotonic */ + uint32_t *block_ptrs[NUM_THREADS]; + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].blocks; + validate_monotonic(block_ptrs); + + /* Validate that the back block allocations were monotonic */ + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].back_blocks; + validate_monotonic(block_ptrs); + + anv_block_pool_finish(&pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c new file mode 100644 index 00000000000..878ec19a595 --- /dev/null +++ b/src/vulkan/tests/state_pool.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } + + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 00000000000..2f4eb47fe45 --- /dev/null +++ b/src/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,66 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c new file mode 100644 index 00000000000..4b248c2ee66 --- /dev/null +++ b/src/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool_test_helper.h b/src/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 00000000000..0e56431303f --- /dev/null +++ b/src/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} |